**Purpose:**
This notebook should consist of our methods of detecting faces and adding effects on top on them in a video.

**Logs:**
- May 3, 2021: Write a class to wrap the face detection and mean blur effect.
- May 3, 2021: Include a new face detection model MTCNN that can leverages GPU through PyTorch.

In [1]:
!pip install face_recognition
!pip install facenet_pytorch
!pip install mmcv

Collecting face_recognition
  Downloading https://files.pythonhosted.org/packages/1e/95/f6c9330f54ab07bfa032bf3715c12455a381083125d8880c43cbe76bb3d0/face_recognition-1.3.0-py2.py3-none-any.whl
Collecting face-recognition-models>=0.3.0
[?25l  Downloading https://files.pythonhosted.org/packages/cf/3b/4fd8c534f6c0d1b80ce0973d01331525538045084c73c153ee6df20224cf/face_recognition_models-0.3.0.tar.gz (100.1MB)
[K     |████████████████████████████████| 100.2MB 55kB/s 
[?25hBuilding wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566173 sha256=69cf24085a06ab739898d9f3d20445aabd0d70ed13f8a8b1e921961029ba05f6
  Stored in directory: /root/.cache/pip/wheels/d2/99/18/59c6c8f01e39810415c0e63f5bede7d83dfb0ffc039865465f
Successfully built face-recognition-models
Installing collected packages: face-recogni

In [2]:
%pylab inline 
import face_recognition
import cv2
import matplotlib.patches as patches
from IPython.display import clear_output
from matplotlib.pyplot import imshow
import matplotlib.pylab as plt
from PIL import Image, ImageDraw
import imageio
from itertools import product
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import mmcv
import numpy as np
import pandas as pd
import os
# import cupy as cp
from time import time


Populating the interactive namespace from numpy and matplotlib


In [3]:
run_on_colab = True
if run_on_colab:
    from google.colab import drive
  
    drive.mount('/content/drive', force_remount=True)
    path = "/content/drive/My Drive/AdvancedPython2021"
else:
    path = os.getcwd()
os.chdir(path)
from array_mp4_conversion import array_to_mp4, mp4_to_array
file = "girl.gif"
data_path = os.path.join(path, "data", file)


Mounted at /content/drive


In [4]:
'''
This script contains the codes that we recently developed, and is used to compile a Cython code.

Steps:
1. Copy this file to a new .pyx file.
2. Compile the code by running python3 setup.py build_ext --inplace.
3. Then run test_cython.py.

'''
import torch
import os
from itertools import product
import numpy as np
import face_recognition
import cv2
import matplotlib.patches as patches
from IPython.display import clear_output
from matplotlib.pyplot import imshow
import matplotlib.pylab as plt
from PIL import Image, ImageDraw
import imageio
from itertools import product
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import mmcv
import numpy as np
import pandas as pd
import os
from time import time
from array_mp4_conversion import array_to_mp4, mp4_to_array
from numba import jit, prange, cuda
from collections import defaultdict
from scipy.signal import convolve2d
# cimport numpy as np
# ctypedef np.uint8_t D_TYPE

path = os.getcwd()

class video_transformer_base:
    '''
    This is the base of video_transformer, containing basic information about the video.
    '''
    def __init__(self,
                path, 
                save_path, 
                file_name, 
                device='cpu',
                display=False):
        
        self.video_path = os.path.join(path, "data", file_name)
        self.video_array, self.fps = mp4_to_array(self.video_path)
        self.display = display
        self.save_path = save_path
        self.file_name = file_name
        self.num_frames = 0
        if device == 'cpu':
            self.device = 'cpu'
        else:
            self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    
    def main_transformation(self, 
                            face_detection_model, 
                            filter_effect):
        '''
        For each frame, do:
        1. detect the face;
        2. apply the filter;
        3. save the processed frame.
        '''                    
        video_capture = cv2.VideoCapture(self.video_path)
        frame_count = 0
        output_frames = []
        while video_capture.isOpened():    
            # Grab a single frame of video
            ret, frame = video_capture.read()
            # try:
            #   frame = torch.from_numpy(frame).to(self.device)
            # except:
            #   print(ret)

            # Bail out when the video file ends
            if not ret:
                video_capture.release()
                break
            
            frame_count += 1
            # print(frame_count)
            # print(type(frame))
            # detect faces
            if face_detection_model != "mtcnn":
                face_locations = self.face_detection(frame, 
                                                     model=face_detection_model)
            else:
                face_locations = self.face_detection_mtcnn(frame)
            # print(f"{len(face_locations)} face(s) detected at frame {frame_count}.")

            # add effect
            after_effect_frame = filter_effect(frame, face_locations)

            # print(frame_count)
            if self.display and frame_count % 2 == 0:
               # If faces were found, we will mark it on frame with blue dots
                for face_location in face_locations:
                    top, right, bottom, left = face_location
                    cv2.rectangle(after_effect_frame,(left, top), (right, bottom), (0, 0, 255), 2)
                plt.imshow(after_effect_frame)
                plt.show()
                clear_output(wait=True)
            # im = Image.fromarray(after_effect_frame)
            # im.save(os.path.join(self.save_path, f"{self.file_name}_prcs_{frame_count}.png"))
            output_frames.append(after_effect_frame)
        self.num_frames = frame_count
        self.des_arr = np.array(output_frames)

    def face_detection(self, frame):
        '''
        Face detection with package face_recognition.
        Models includes: svm, knn, cnn.
        Currently fixed as model='svm' because model ='cnn' is slow.
        '''
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        rgb_frame = frame[:, :, ::-1]
        face_locations = face_recognition.face_locations(rgb_frame, model='svm')
        # print(f"{len(face_locations)} face(s) detected.")
        
        return face_locations
    def face_detection_mtcnn(self, frame):
        '''
        Face detection with package facenet_pytorch.
        MTCNN implemented in Pytorch, so also support CUDA.
        '''       

        mtcnn = MTCNN(keep_all=True, device=self.device)
        boxes, _ = mtcnn.detect(frame)
        
        if boxes is None:
            boxes = []
            
        boxes = np.array([[box[1], box[2], box[3], box[0]] for box in boxes]).astype(np.int)
        # print(f"{len(boxes)} face(s) detected.")
        return boxes
    def oil_effect(self, frame):
        pass
    
    def negative_effect(self, frame, locations):
        des_img = np.copy(frame)
        try:
            for location in locations:
                t_, r_, b_, l_ = location.astype(int)

                des_img[t_:b_,l_:r_] = 255 - frame[t_:b_,l_:r_]
        except:
            pass
        
        return des_img

    def mean_blur(self, frame, locations, radius=5):
        '''
        Apply simple mosaic effect to specified regions. 
        '''
        k = 1 / (radius*2+1)**2
        des_img = np.copy(frame)
        height, width, _ = des_img.shape
        # try:
        for location in locations:
            top, right, bottom, left = location
            t_ = max(top+radius,0)
            b_ = min(bottom-radius, height)
            l_ = max(left+radius,0)
            r_ = min(right-radius, width)
            if t_ >= b_ or l_ >= r_:
                continue

            for i, j in product(range(t_, b_), range(l_, r_)):
                kernel = frame[i-radius:i+radius+1, j-radius:j+radius+1, :]
                sumed = np.sum(kernel, axis = (0,1)) * k
                des_img[i, j] = sumed.astype(np.uint8)
        # except:
        #     pass
        
        return des_img    
    
    # construct transformed gif
    def output(self):
        images = []
        frames_count = list(range(1,self.num_frames))
        
        for i in frames_count:
            try:
                images.append(imageio.imread(
                    os.path.join(self.save_path, f"{self.file_name}_prcs_{i}.png")))
            except:
                pass
        imageio.mimsave(os.path.join(self.save_path, f"{self.file_name}_prcs.gif"), images)
    
    def write_to_video(self, output_filename):
        '''
        Write out the video with filter to mp4.
        '''
        array_to_mp4(output_filename, self.des_arr, self.fps)

class video_transformer_parallel(video_transformer_base):
    '''
    This version views the video as an array for easier parallelization.
    '''
    def __init__(self, path, save_path, file_name, device='cpu',display=False):
        video_transformer_base.__init__(self, path, save_path, file_name, 
                                        device, display)
        
        self.locations = None
        self.des_arr = None

        torch.from_numpy(self.video_array).to(self.device)
        
    def filter_on_video_cuda(self, img_blur_cuda, radius=10, face_detection_model = 'mtcnn'):
        blocksize = (32,32)
        k = 1 / (2*radius+1)**2
        self.des_arr = self.video_array.copy()
        frame_size = self.video_array.shape[0]
        self.locations = self.get_face_locations(face_detection_model)
        np.save("frame_test.out", self.des_arr[20])
        np.save("locations_tests.out", self.locations[20])
        for i in range(frame_size):
            for location in self.locations[i]:
                top, right, bottom, left = location
                face = np.ascontiguousarray(self.des_arr[i, top:bottom+1, left:right+1, :])
                gridsize = (face.shape[0]//blocksize[0]+1, face.shape[1]//blocksize[1]+1)
                blur_face = np.empty_like(face)
                img_blur_cuda[gridsize, blocksize](face, blur_face, k, radius)
                self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face
    
    def filter_on_video_cv2(self, cv2_blur, radius=10, face_detection_model = 'mtcnn'):
        k = 1 / (2*radius+1)**2
        self.des_arr = self.video_array.copy()
        frame_size = self.video_array.shape[0]
        self.locations = self.get_face_locations(face_detection_model)
        
        for i in range(frame_size):
            for location in self.locations[i]:
                top, right, bottom, left = location
                face = self.des_arr[i, top:bottom+1, left:right+1, :]
                blur_face = np.empty_like(face)
                cv2_blur(src=face, dst=blur_face, ksize=(radius,radius))
                self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face   

    def filter_on_video_face_only(self, filter_func, radius=10, face_detection_model = 'mtcnn'):
        k = 1 / (2*radius+1)**2
        self.des_arr = self.video_array.copy()
        frame_size = self.video_array.shape[0]
        self.locations = self.get_face_locations(face_detection_model)

        for i in range(frame_size):
            for location in self.locations[i]:
                top, right, bottom, left = location
                face = self.des_arr[i, top:bottom+1, left:right+1, :]
                blur_face = np.empty_like(face)
                blur_face = filter_func(face=face, blur_face=blur_face, radius=radius)
                self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face   

    # @jit(nopython=False, parallel=True)
    def mean_blur_git(self, image, des_img, locations, radius):
        '''
        mean_blur function with a source and destination image, the logic remains the same.
        '''
        # radius has to be even
        if len(locations) == 0:
            print("No faces")
            return
        # print(len(locations))
        k = 1 / (radius*2+1)**2
        height, width, _ = des_img.shape
        for (top, right, bottom, left) in locations:
            t_ = max(top+radius,0)
            b_ = min(bottom-radius, height)
            l_ = max(left+radius,0)
            r_ = min(right-radius, width)

            if t_ >= b_ or l_ >= r_:
                continue
          
            for i, j in product(range(t_, b_), range(l_, r_)):
                summed = np.sum(image[i-radius:i+radius+1, j-radius:j+radius+1, :], axis = (0,1), dtype=np.uint32)
                # print(summed)
                # sumed = np.sum(np.sum(kernel, axis= 0, dtype=np.uint32), axis=0)
                # sumed = np.sum(kernel, axis=(0,1), dtype=np.uint32)
                des_img[i, j, :] = (summed * k).astype(np.uint8)
            
            cv2.rectangle(des_img, (left, top), (right, bottom), (0, 0, 255), 2)
    
    def mean_blur_convolution_face_only(self, face, blur_face, radius):
        '''
        This function only takes in the face portion, intended to decrease the amount of data transfer.
        '''
        n_neighbor = radius*2+1
        kernel = np.ones((n_neighbor, n_neighbor))
        red = convolve2d(face[:,:,0], kernel, 'same')
        green = convolve2d(face[:,:,1], kernel, 'same')
        blue = convolve2d(face[:,:,2], kernel, 'same')
        convol_area = (np.stack([red, green, blue], axis=2)  / (n_neighbor**2)).astype(np.uint8)

        return convol_area

    def mean_blur_convolution(self, image, des_img, locations, radius):
        '''
        Utilized Scipy convolution to perform blurring effect.
        '''
        if len(locations) == 0:
              print("No faces")
              return
          # print(len(locations))
        n_neighbor = radius*2+1
        height, width, _ = des_img.shape
        kernel = np.ones((n_neighbor, n_neighbor))
        for (top, right, bottom, left) in locations:
            t_ = max(top+radius,0)
            b_ = min(bottom-radius, height)
            l_ = max(left+radius,0)
            r_ = min(right-radius, width)

            if t_ >= b_ or l_ >= r_:
                continue
            sample_area = image[t_-radius:b_+radius+1, l_-radius:r_+radius+1,:].astype(np.uint8)
            
            # np.save("face.out", sample_area)
            red = convolve2d(sample_area[:,:,0], kernel, 'same')
            green = convolve2d(sample_area[:,:,1], kernel, 'same')
            blue = convolve2d(sample_area[:,:,2], kernel, 'same')

            convol_area = np.stack([red, green, blue], axis=2)

            des_img[t_-radius:b_+radius+1, l_-radius:r_+radius+1,:] = (convol_area / (n_neighbor**2)).astype(np.uint8)
            # cv2.rectangle(des_img, (left, top), (right, bottom), (0, 0, 255), 2)            
                    
                    
    def get_face_locations(self, face_detection_model):
        '''
        get face_locations on entire video as an array.
        '''
        des_arr = torch.from_numpy(self.video_array.copy()).to(self.device)
        
        if face_detection_model != 'mtcnn':
            locations = list(map(self.face_detection, des_arr))
        else:
            locations = list(map(self.face_detection_mtcnn, des_arr))    

        return locations
    
    
    @jit(nopython=False, parallel=True)
    def filter_on_video(self, filter_func, face_detection_model = 'mtcnn', radius = 10):
        '''
        Apply filter on the video.
        '''
        self.des_arr = self.video_array.copy()
        frame_size = self.video_array.shape[0]
        self.locations = self.get_face_locations(face_detection_model)
        radius_list = [radius] * frame_size

        list(map(filter_func, self.video_array, self.des_arr, self.locations, radius_list))
#         for i in prange(frame_size):
#             filter_func(self.video_array[i], self.des_arr[i], self.locations[i], radius)


In [75]:
from multiprocessing import Process, Queue,Lock
class video_transformer_multiprocessing(video_transformer_parallel):
    '''
    This module aims to use multiprocessing to speed up the blurring.
    '''
    def __init__(self, path, save_path, file_name,N, device='cpu',display=False):
        video_transformer_parallel.__init__(self, path, save_path, file_name, 
                                        device, display)
        self.N = N
    def mean_blur_one_location(self, locations, i):
        # blur_faces = []
        for location in locations:
            # print(location)
            top, right, bottom, left = location
            face = self.des_arr[i,top:bottom+1, left:right+1, :]
            blur_face = np.empty_like(face)
            blur_face = self.mean_blur_convolution_face_only(face=face, blur_face=blur_face, radius=self.radius)
            self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face
        return self.des_arr
    
    def mean_blur_one_frame(self, frame, locations):
        for location in locations:
            # print(location)
            top, right, bottom, left = location
            face = frame[top:bottom+1, left:right+1, :]
            blur_face = np.empty_like(face)
            blur_face = self.mean_blur_convolution_face_only(face=face, blur_face=blur_face, radius=self.radius)
            frame[top:bottom+1, left:right+1, :] = blur_face
        return frame
    def mean_blur_some_frame(self, frames, list_locations, queue, idx):
        frames_update = []
        for i in range(len(frames)):
          frame = frames[i]
          locations_=list_locations[i]
          frame_update = self.mean_blur_one_frame(frame, locations_)
          frames_update.append(frame_update)
        queue.put((idx,frames_update))
        # return frames_update

    def filter_on_video_mult(self, filter_func, radius=10, face_detection_model = 'mtcnn'):
        self.des_arr = self.video_array.copy()
        frame_size = self.video_array.shape[0]
        self.locations = self.get_face_locations(face_detection_model)
        self.radius = radius

        frames_portions = list(np.array_split(self.des_arr, self.N))
        locations_portions  = list(np.array_split(self.locations, self.N))
        id = list(range(self.N))
        q = Queue()
        jobs = []
        rets = []
        # args = [(self.des_arr[i], self.locations[i]) for i in range(frame_size)]
        # with Pool(self.N) as p:
        #     results = p.starmap(self.mean_blur_one_frame, args)
        lock = Lock()

        # for i in range(frame_size):
        #   frame = self.des_arr[i]
        #   locations_portions = list(np.array_split(self.locations[i], self.N))
        for i in range(self.N):
            p = Process(target=self.mean_blur_some_frame, args=(frames_portions[i],locations_portions[i], q, i))
            # p = Process(target=self.mean_blur_one_frame, args=(frame,locations_portions[i],q))
            p.Daemon = True
            jobs.append(p)
            p.start()
        for p in jobs:
            ret = q.get() # will block
            rets.append(ret)
        for p in jobs:
            p.join()
        # print(len(rets))
        rets.sort(key=lambda x:x[0])

        # print([ret[0] for ret in rets_sorted])
        rets = np.array([ret[1] for ret in rets])
        
        self.des_arr = np.concatenate(rets).astype(np.uint8)
        # print(len(rets))
        print(self.des_arr.shape)
            



In [40]:
# frame_test = np.load("frame_test.out.npy").astype(np.uint8)
# locations_test = np.load("locations_tests.out.npy")

In [84]:
# global des_arr 
# des_arr= frame_test.copy()
# N = 2
# chunk = list(np.array_split(locations_test, N))
# import itertools
# def mean_blur_convolution_face_only(face, blur_face, radius):
#     '''
#     This function only takes in the face portion, intended to decrease the amount of data transfer.
#     '''
#     n_neighbor = radius*2+1
#     kernel = np.ones((n_neighbor, n_neighbor))
#     red = convolve2d(face[:,:,0], kernel, 'same')
#     green = convolve2d(face[:,:,1], kernel, 'same')
#     blue = convolve2d(face[:,:,2], kernel, 'same')
#     convol_area = (np.stack([red, green, blue], axis=2)  / (n_neighbor**2)).astype(np.uint8)
#     return convol_area
# def mean_blur_one_location(locations):
#     # blur_faces = []
#     for i in range(len(locations)):
#         top, right, bottom, left = locations[i]
#         face = des_arr[top:bottom+1, left:right+1, :]
#         blur_face = np.empty_like(face)
#         blur_face = mean_blur_convolution_face_only(face=face, blur_face=blur_face, radius=10)
#         des_arr[top:bottom+1, left:right+1, :] = blur_face


# list(map(mean_blur_one_location, chunk))

[None, None]

In [None]:
# img = Image.fromarray(des_arr)
# img

In [76]:
start = time()
case_9 = video_transformer_multiprocessing(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'hamilton_short.mp4',
                           N=4,
                           display=False,
                           device='gpu')
case_9.filter_on_video_mult(case_9.mean_blur_convolution_face_only)
case_9.write_to_video("hamilton_short_gpu_conv_face_only_mult.mp4")
print(f"Time used {time() - start}s")

  return array(a, dtype, copy=False, order=order)


(834, 360, 640, 3)
Time used 63.41239666938782s


**1. Base**

peds_shorts.mp4 w/ Colab CPU: 
- 1st: 65.86514019966125s.

Peds_short.mp4 w/ Colab GPU
- 1st: 32.44067120552063s.


In [None]:
ts = time()
case_1 = video_transformer_base(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           display=False)
case_1.main_transformation("mtcnn", case_1.mean_blur)
case_1.write_to_video()
print(f"Time used {time() - ts}s.")

Time used 162.5575568675995s.


In [None]:
ts = time()
case_2 = video_transformer_base(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           display=False,
                           device='gpu')
case_2.main_transformation("mtcnn", case_2.mean_blur)
case_2.write_to_video()
print(f"Time used {time() - ts}s.")

Time used 55.120919942855835s.


In [None]:
ts = time()
case_3 = video_transformer_base(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'hamilton_clip.mp4',
                           display=False,
                           device='gpu')
case_3.main_transformation("mtcnn", case_3.negative_effect)
case_3.output()
print(f"Time used {time() - ts}s.")

**2. Parallel**

Speeds of Peds_short:
  - w/ GPU: 33s
  - w/ CPU: 155s
  - w/ GPU + convolution blurring: 17s
  - w/ GPU + CUDA: 11s

Issues:
  - Less faced detected:
    - Solved by activating iterator ```list(map())```.
  - ```array_detection()```
    - RAM not enough
    - Slower
  - ```@jit mean_blur_jit()```
    - showing only one location at one time.
    - Add the if ```len()==0``` return statement, still doesn'work.


In [None]:
ts = time()
case_4 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds_short.mp4',
                           display=False,
                           device='gpu')
case_4.filter_on_video(case_4.mean_blur_git)
case_4.write_to_video("peds_short_gpu_parallel.mp4")
print(f"Time used {time() - ts}s.")

In [None]:
ts = time()
case_5 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds_short.mp4',
                           display=False,
                           device='gpu')
case_5.filter_on_video(case_5.mean_blur_convolution)
case_5.write_to_video("peds_short_gpu_parallel_conv.mp4")
print(f"Time used {time() - ts}s.")

In [None]:
ts = time()
case_6 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds_short.mp4',
                           display=False,
                           device='gpu')
case_6.filter_on_video_cuda()
case_6.write_to_video("peds_short_gpu_parallel_cuda.mp4")
print(f"Time used {time() - ts}s.")

In [None]:
ts = time()
case_5 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           display=False,
                           device='cpu')
case_5.filter_on_video(case_5.mean_blur)
case_5.write_to_video(output_filename = case_5.file_name+"_cpu_parallel.mp4")
print(f"Time used {time() - ts}s.")

In [38]:
ts = time()
case_4 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'hamilton_short.mp4',
                           display=False,
                           device='gpu')
case_4.filter_on_video(case_4.mean_blur_convolution)
case_4.write_to_video("hamilton_short_gpu_parallel.mp4")
print(f"Time used {time() - ts}s.")

No faces
Time used 16.166011333465576s.


In [113]:
start = time()
case_8 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           display=False,
                           device='gpu')
case_8.filter_on_video_face_only(case_8.mean_blur_convolution_face_only)
case_8.write_to_video("peds_gpu_conv_face_only.mp4")
print(f"Time used {time() - start}s")

Time used 35.428462266922s


In [52]:
@cuda.jit
def img_blur_cuda(img, des_img, k, radius):
    '''
    numba cuda version of blurring algorithm
    '''
    i, j = cuda.grid(2)

    rows, columns, channel = img.shape
    if i >= rows or j >= columns:
        return

    ra = rows - radius
    ca = columns - radius
    if i < radius or j < radius or i >= ra or j >= ca:
        des_img[i, j, 0] = img[i, j, 0]
        des_img[i, j, 1] = img[i, j, 1]
        des_img[i, j, 2] = img[i, j, 2]
        return

    r = 0
    g = 0
    b = 0
    for x in range(-radius, radius + 1):
        for y in range(-radius, radius + 1):
            i_x = i + x
            j_y = j + y
            r += img[i_x, j_y, 0] * k
            g += img[i_x, j_y, 1] * k
            b += img[i_x, j_y, 2] * k
    des_img[i, j, 0] = r
    des_img[i, j, 1] = g
    des_img[i, j, 2] = b

In [53]:
start = time()
case_cuda = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'hamilton_short.mp4',
                           display=False,
                           device='gpu')
case_cuda.filter_on_video_cuda(img_blur_cuda)
case_cuda.write_to_video("hamilton_short_cuda_gpu.mp4")
print(f"Time used {time() - start}s")

Time used 37.73459982872009s


In [94]:
frame_test = np.load("frame_test.out.npy").astype(np.uint8)
locations_test = np.load("locations_tests.out.npy")

In [95]:
%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [47]:
%%cython
import cv2
from scipy.signal import convolve2d
import numpy as np
cimport numpy as np
ctypedef np.npy_intp SIZE_t
# cimport cython
# @cython.boundscheck(False)
def mean_blur_convolution_cython(np.ndarray[np.uint8_t, ndim=3] image, 
                                 np.ndarray[np.uint8_t, ndim=3] des_img, 
                                 np.ndarray locations, 
                                 double radius):
    '''
    Utilized Scipy convolution to perform blurring effect.
    '''
    if len(locations) == 0:
          # print("No faces")
          return
    cdef int n_neighbor = np.int32((radius)*2+1)
    cdef int height = des_img.shape[0]
    cdef int width = des_img.shape[1]
    cdef np.ndarray[int, ndim = 2]kernel = np.ones((n_neighbor, n_neighbor),dtype=np.int32)
    cdef int t_, b_, l_, r_, bound_top, bound_bottom, bound_left, bound_right

    for (top, right, bottom, left) in locations:
        t_ = np.int32(max(top+radius,0))
        b_ = np.int32(min(bottom-radius, height))
        l_ = np.int32(max(left+radius,0))
        r_ = np.int32(min(right-radius, width))

        bound_top = np.int32(t_-radius)
        bound_bottom = np.int32(b_+radius+1)
        bound_left = np.int32(l_-radius)
        bound_right = np.int32(r_+radius+1)

        if t_ >= b_ or l_ >= r_:
            continue
        
        sample_area = image[bound_top:bound_bottom, 
                            bound_left:bound_right,:]
        
        # np.save("face.out", sample_area)
        red = convolve2d(sample_area[:,:,np.int32(0)], kernel, 'same')
        green = convolve2d(sample_area[:,:,np.int32(1)], kernel, 'same')
        blue = convolve2d(sample_area[:,:,np.int32(2)], kernel, 'same')

        convol_area = np.stack([red, green, blue], axis=2)

        des_img[bound_top:bound_bottom, 
               bound_left:bound_right,:] = (convol_area / (n_neighbor**2)).astype(np.uint8)
        cv2.rectangle(des_img, (left, top), (right, bottom), (0, 0, 255), 2)  

In [54]:
ts = time()
case_5 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'hamilton_short.mp4',
                           display=False,
                           device='gpu')
case_5.filter_on_video(mean_blur_convolution_cython)
case_5.write_to_video("hamilton_short_gpu_parallel_cython.mp4")
print(f"Time used {time() - ts}s.")

Time used 69.33301115036011s.


In [64]:
start = time()
case_cv2 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds_short.mp4',
                           display=False,
                           device='gpu')
case_cv2.filter_on_video_cv2(cv2.blur)
case_cv2.write_to_video("peds_short_cuda_gpu_cv2.mp4")
print(f"Time used {time() - start}s")

Time used 10.602014064788818s
