**Purpose:**
This notebook consists of profiling of the python applications and scripts that we developed for the project. 

In [None]:
!pip install face_recognition
!pip install facenet_pytorch
!pip install mmcv
!pip install line_profiler

In [2]:
%pylab inline 
import face_recognition
import cv2
import matplotlib.patches as patches
from IPython.display import clear_output
from matplotlib.pyplot import imshow
import matplotlib.pylab as plt
from PIL import Image, ImageDraw
import imageio
from itertools import product
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import mmcv
import numpy as np
import pandas as pd
import os
# import cupy as cp
from time import time
%load_ext line_profiler
import warnings
warnings.filterwarnings("ignore")


Populating the interactive namespace from numpy and matplotlib


In [3]:
run_on_colab = True
if run_on_colab:
    from google.colab import drive
  
    drive.mount('/content/drive', force_remount=True)
    path = "/content/drive/My Drive/AdvancedPython2021"
else:
    path = os.getcwd()
os.chdir(path)
from array_mp4_conversion import array_to_mp4, mp4_to_array
file = "girl.gif"
data_path = os.path.join(path, "data", file)


Mounted at /content/drive


In [4]:
import torch
import os
from itertools import product
import numpy as np
import face_recognition
import cv2
import matplotlib.patches as patches
from IPython.display import clear_output
from matplotlib.pyplot import imshow
import matplotlib.pylab as plt
from PIL import Image, ImageDraw
import imageio
from itertools import product
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import mmcv
import numpy as np
import pandas as pd
import os
from time import time
from array_mp4_conversion import array_to_mp4, mp4_to_array
from numba import jit, prange, cuda
from collections import defaultdict
from scipy.signal import convolve2d
# cimport numpy as np
# ctypedef np.uint8_t D_TYPE

path = os.getcwd()

class video_transformer_base:
    '''
    This is the base of video_transformer, containing basic information about the video.
    '''
    def __init__(self,
                path, 
                save_path, 
                file_name, 
                device='cpu',
                display=False):
        
        self.video_path = os.path.join(path, "data", file_name)
        self.video_array, self.fps = mp4_to_array(self.video_path)
        self.display = display
        self.save_path = save_path
        self.file_name = file_name
        self.num_frames = 0
        if device == 'cpu':
            self.device = 'cpu'
        else:
            self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    
    def main_transformation(self, 
                            face_detection_model, 
                            filter_effect):
        '''
        For each frame, do:
        1. detect the face;
        2. apply the filter;
        3. save the processed frame.
        '''                    
        video_capture = cv2.VideoCapture(self.video_path)
        frame_count = 0
        output_frames = []
        while video_capture.isOpened():    
            # Grab a single frame of video
            ret, frame = video_capture.read()

            # Bail out when the video file ends
            if not ret:
                video_capture.release()
                break
            
            frame_count += 1

            # detect faces
            if face_detection_model != "mtcnn":
                face_locations = self.face_detection(frame, 
                                                     model=face_detection_model)
            else:
                face_locations = self.face_detection_mtcnn(frame)

            # add effect
            after_effect_frame = filter_effect(frame, face_locations)

            if self.display and frame_count % 2 == 0:
               # If faces were found, we will mark it on frame with blue dots
                for face_location in face_locations:
                    top, right, bottom, left = face_location
                    cv2.rectangle(after_effect_frame,(left, top), (right, bottom), (0, 0, 255), 2)
                plt.imshow(after_effect_frame)
                plt.show()
                clear_output(wait=True)

            output_frames.append(after_effect_frame)
        self.num_frames = frame_count
        self.des_arr = np.array(output_frames)

    def face_detection(self, frame):
        '''
        Face detection with package face_recognition.
        Models includes: svm, knn, cnn.
        Currently fixed as model='svm' because model ='cnn' is slow.
        '''
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        rgb_frame = frame[:, :, ::-1]
        face_locations = face_recognition.face_locations(rgb_frame, model='svm')
        # print(f"{len(face_locations)} face(s) detected.")
        
        return face_locations
    def face_detection_mtcnn(self, frame):
        '''
        Face detection with package facenet_pytorch.
        MTCNN implemented in Pytorch, so also support CUDA.
        '''       

        mtcnn = MTCNN(keep_all=True, device=self.device)
        boxes, _ = mtcnn.detect(frame)
        
        if boxes is None:
            boxes = []
            
        boxes = np.array([[box[1], box[2], box[3], box[0]] for box in boxes]).astype(np.int)
        # print(f"{len(boxes)} face(s) detected.")
        return boxes
    def oil_effect(self, frame):
        '''
        Please refer to unused_oil_effect.py for implementation.
        '''
        
        pass
    
    def negative_effect(self, frame, locations):
        '''
        Apply negative filter effect to target locations.
        '''
        des_img = np.copy(frame)
        try:
            for location in locations:
                t_, r_, b_, l_ = location.astype(int)

                des_img[t_:b_,l_:r_] = 255 - frame[t_:b_,l_:r_]
        except:
            pass
        
        return des_img

    def mean_blur(self, frame, locations, radius=5):
        '''
        Apply simple mosaic effect to specified regions. 
        '''
        k = 1 / (radius*2+1)**2
        des_img = np.copy(frame)
        height, width, _ = des_img.shape

        for location in locations:
            top, right, bottom, left = location
            t_ = max(top+radius,0)
            b_ = min(bottom-radius, height)
            l_ = max(left+radius,0)
            r_ = min(right-radius, width)
            if t_ >= b_ or l_ >= r_:
                continue

            for i, j in product(range(t_, b_), range(l_, r_)):
                kernel = frame[i-radius:i+radius+1, j-radius:j+radius+1, :]
                sumed = np.sum(kernel, axis = (0,1)) * k
                des_img[i, j] = sumed.astype(np.uint8)

        
        return des_img    
    
    def write_to_video(self, output_filename):
        '''
        Write out the video with filter to mp4.
        '''
        array_to_mp4(output_filename, self.des_arr, self.fps)

class video_transformer_parallel(video_transformer_base):
    '''
    This version views the video as an array for easier parallelization.
    '''
    def __init__(self, path, save_path, file_name, device='cpu',display=False):
        video_transformer_base.__init__(self, path, save_path, file_name, 
                                        device, display)
        
        self.locations = None
        self.des_arr = None

        torch.from_numpy(self.video_array).to(self.device)    
   
    def filter_on_video(self, filter_func, face_detection_model = 'mtcnn', radius = 10):
        '''
        Apply filter on the video.
        '''
        self.des_arr = self.video_array.copy()
        frame_size = self.video_array.shape[0]
        self.locations = self.get_face_locations(face_detection_model)
        radius_list = [radius] * frame_size

        list(map(filter_func, self.video_array, self.des_arr, self.locations, radius_list))
        
    def filter_on_video_cuda(self, img_blur_cuda, radius=10, face_detection_model = 'mtcnn'):
        '''
        Filter on entire video using CUDA for blurring. It specifies block size and controls bounds.
        '''
        blocksize = (32,32)
        k = 1 / (2*radius+1)**2
        self.des_arr = self.video_array.copy()
        frame_size = self.video_array.shape[0]
        self.locations = self.get_face_locations(face_detection_model)
        np.save("frame_test.out", self.des_arr[20])
        np.save("locations_tests.out", self.locations[20])
        for i in range(frame_size):
            for location in self.locations[i]:
                top, right, bottom, left = location
                face = np.ascontiguousarray(self.des_arr[i, top:bottom+1, left:right+1, :])
                gridsize = (face.shape[0]//blocksize[0]+1, face.shape[1]//blocksize[1]+1)
                blur_face = np.empty_like(face)
                img_blur_cuda[gridsize, blocksize](face, blur_face, k, radius)
                self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face
    
    def filter_on_video_cv2(self, cv2_blur, radius=10, face_detection_model = 'mtcnn'):
        '''
        Filter on entire video using CV2.blur.
        '''
        k = 1 / (2*radius+1)**2
        self.des_arr = self.video_array.copy()
        frame_size = self.video_array.shape[0]
        self.locations = self.get_face_locations(face_detection_model)
        
        for i in range(frame_size):
            for location in self.locations[i]:
                top, right, bottom, left = location
                face = self.des_arr[i, top:bottom+1, left:right+1, :]
                blur_face = np.empty_like(face)
                cv2_blur(src=face, dst=blur_face, ksize=(radius,radius))
                self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face   

    def filter_on_video_face_only(self, filter_func, radius=10, face_detection_model = 'mtcnn'):
        '''
        Filter on the entire video, but the filter function only take in the face regions 
        instead of the entire frame to reduce data size.
        '''
        k = 1 / (2*radius+1)**2
        self.des_arr = self.video_array.copy()
        frame_size = self.video_array.shape[0]
        self.locations = self.get_face_locations(face_detection_model)

        for i in range(frame_size):
            for location in self.locations[i]:
                top, right, bottom, left = location
                face = self.des_arr[i, top:bottom+1, left:right+1, :]
                blur_face = np.empty_like(face)
                blur_face = filter_func(face=face, blur_face=blur_face, radius=radius)
                self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face   
    
    def mean_blur_convolution(self, image, des_img, locations, radius):
        '''
        Utilized Scipy convolution to perform blurring effect on faces in one frame.
        '''
        if len(locations) == 0:
              print("No faces")
              return
          # print(len(locations))
        n_neighbor = radius*2+1
        height, width, _ = des_img.shape
        kernel = np.ones((n_neighbor, n_neighbor))
        for (top, right, bottom, left) in locations:
            t_ = max(top+radius,0)
            b_ = min(bottom-radius, height)
            l_ = max(left+radius,0)
            r_ = min(right-radius, width)

            if t_ >= b_ or l_ >= r_:
                continue
            sample_area = image[t_-radius:b_+radius+1, l_-radius:r_+radius+1,:].astype(np.uint8)
            
            # np.save("face.out", sample_area)
            red = convolve2d(sample_area[:,:,0], kernel, 'same')
            green = convolve2d(sample_area[:,:,1], kernel, 'same')
            blue = convolve2d(sample_area[:,:,2], kernel, 'same')

            convol_area = np.stack([red, green, blue], axis=2)

            des_img[t_-radius:b_+radius+1, l_-radius:r_+radius+1,:] = (convol_area / (n_neighbor**2)).astype(np.uint8)
            # cv2.rectangle(des_img, (left, top), (right, bottom), (0, 0, 255), 2)            
                    
    def mean_blur_convolution_face_only(self, face, blur_face, radius):
        '''
        Apply blurring with scipy.convolve2d() to faces only.
        '''
        n_neighbor = radius*2+1
        kernel = np.ones((n_neighbor, n_neighbor))
        red = convolve2d(face[:,:,0], kernel, 'same')
        green = convolve2d(face[:,:,1], kernel, 'same')
        blue = convolve2d(face[:,:,2], kernel, 'same')
        convol_area = (np.stack([red, green, blue], axis=2)  / (n_neighbor**2)).astype(np.uint8)

        return convol_area
                    
    def get_face_locations(self, face_detection_model):
        '''
        Get a list of face_locations on entire video.
        '''
        des_arr = torch.from_numpy(self.video_array.copy()).to(self.device)
        
        if face_detection_model != 'mtcnn':
            locations = list(map(self.face_detection, des_arr))
        else:
            locations = list(map(self.face_detection_mtcnn, des_arr))    

        return locations



In [5]:
from multiprocessing import Process, Queue, Lock
class video_transformer_multiprocessing(video_transformer_parallel):
    '''
    This module aims to use multiprocessing to speed up the blurring.
    '''
    def __init__(self, path, save_path, file_name,N, device='cpu',display=False):
        video_transformer_parallel.__init__(self, path, save_path, file_name, 
                                        device, display)
        self.N = N
    def mean_blur_one_location(self, locations, i):
        '''
        Apply the blurring function to only one face.
        '''
        for location in locations:
            top, right, bottom, left = location
            face = self.des_arr[i,top:bottom+1, left:right+1, :]
            blur_face = np.empty_like(face)
            blur_face = self.mean_blur_convolution_face_only(face=face, blur_face=blur_face, radius=self.radius)
            self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face
        return self.des_arr
    
    def mean_blur_one_frame(self, frame, locations):
        '''
        Apply the blurring function to all faces within one frame.
        '''
        for location in locations:
            # print(location)
            top, right, bottom, left = location
            face = frame[top:bottom+1, left:right+1, :]
            blur_face = np.empty_like(face)
            blur_face = self.mean_blur_convolution_face_only(face=face, blur_face=blur_face, radius=self.radius)
            frame[top:bottom+1, left:right+1, :] = blur_face
        return frame
    def mean_blur_some_frame(self, frames, list_locations, queue, idx):
        '''
        Apply the blurring effect to all faces within multiple frames.
        '''
        frames_update = []
        for i in range(len(frames)):
          frame = frames[i]
          locations_=list_locations[i]
          frame_update = self.mean_blur_one_frame(frame, locations_)
          frames_update.append(frame_update)
        queue.put((idx,frames_update))
        # return frames_update

    def filter_on_video_mult(self, filter_func, radius=10, face_detection_model = 'mtcnn'):
        '''
        Divide the entire video into several parts, and apply blurring in parallel.
        '''
        self.des_arr = self.video_array.copy()
        frame_size = self.video_array.shape[0]
        self.locations = self.get_face_locations(face_detection_model)
        self.radius = radius

        frames_portions = list(np.array_split(self.des_arr, self.N))
        locations_portions  = list(np.array_split(self.locations, self.N))
        q = Queue()
        jobs = []
        rets = []

        lock = Lock()


        for i in range(self.N):
            p = Process(target=self.mean_blur_some_frame, args=(frames_portions[i],locations_portions[i], q, i))
            p.Daemon = True
            jobs.append(p)
            p.start()
            
        for p in jobs:
            ret = q.get() # will block
            rets.append(ret)
        for p in jobs:
            p.join()
            
        # sort them by the index to restore order
        rets.sort(key=lambda x:x[0])

        rets = np.array([ret[1] for ret in rets])
        
        self.des_arr = np.concatenate(rets).astype(np.uint8)
        print(self.des_arr.shape)
            



# We listed some test cases that are described in the report below.

In [6]:
# CPU Base: use CPU for both face detection (Pytorch) and naive blurring
def case1():
  case_1 = video_transformer_base(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           display=False)
  case_1.main_transformation("mtcnn", case_1.mean_blur)
  case_1.write_to_video("cpu_base_peds.mp4")

In [7]:
%lprun -f video_transformer_base.main_transformation case1()

```
Timer unit: 1e-06 s

Total time: 178.407 s
File: <ipython-input-4-6509e3bc97f5>
Function: main_transformation at line 54

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    54                                               def main_transformation(self, 
    55                                                                       face_detection_model, 
    56                                                                       filter_effect):
    57                                                   '''
    58                                                   For each frame, do:
    59                                                   1. detect the face;
    60                                                   2. apply the filter;
    61                                                   3. save the processed frame.
    62                                                   '''                    
    63         1      21666.0  21666.0      0.0          video_capture = cv2.VideoCapture(self.video_path)
    64         1          3.0      3.0      0.0          frame_count = 0
    65         1          2.0      2.0      0.0          output_frames = []
    66       106        565.0      5.3      0.0          while video_capture.isOpened():    
    67                                                       # Grab a single frame of video
    68       106     631081.0   5953.6      0.4              ret, frame = video_capture.read()
    69                                           
    70                                                       # Bail out when the video file ends
    71       106        365.0      3.4      0.0              if not ret:
    72         1       1817.0   1817.0      0.0                  video_capture.release()
    73         1          2.0      2.0      0.0                  break
    74                                                       
    75       105        186.0      1.8      0.0              frame_count += 1
    76                                           
    77                                                       # detect faces
    78       105        154.0      1.5      0.0              if face_detection_model != "mtcnn":
    79                                                           face_locations = self.face_detection(frame, 
    80                                                                                                model=face_detection_model)
    81                                                       else:
    82       105  135018191.0 1285887.5     75.7                  face_locations = self.face_detection_mtcnn(frame)
    83                                           
    84                                                       # add effect
    85       105   42536444.0 405109.0     23.8              after_effect_frame = filter_effect(frame, face_locations)
    86                                           
    87       105        351.0      3.3      0.0              if self.display and frame_count % 2 == 0:
    88                                                          # If faces were found, we will mark it on frame with blue dots
    89                                                           for face_location in face_locations:
    90                                                               top, right, bottom, left = face_location
    91                                                               cv2.rectangle(after_effect_frame,(left, top), (right, bottom), (0, 0, 255), 2)
    92                                                           plt.imshow(after_effect_frame)
    93                                                           plt.show()
    94                                                           clear_output(wait=True)
    95                                           
    96       105        184.0      1.8      0.0              output_frames.append(after_effect_frame)
    97         1          3.0      3.0      0.0          self.num_frames = frame_count
    98         1     196095.0 196095.0      0.1          self.des_arr = np.array(output_frames)
```

In [8]:
# GPU Base: use GPU for face detection (Pytorch), but CPU for naive blurring
def case2():
  case_2 = video_transformer_base(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           display=False,
                           device='gpu')
  case_2.main_transformation("mtcnn", case_2.mean_blur)
  case_2.write_to_video("gpu_base_peds.mp4")

In [9]:
%lprun -f video_transformer_base.main_transformation case2()

```
Timer unit: 1e-06 s

Total time: 80.4681 s
File: <ipython-input-4-6509e3bc97f5>
Function: main_transformation at line 54

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    54                                               def main_transformation(self, 
    55                                                                       face_detection_model, 
    56                                                                       filter_effect):
    57                                                   '''
    58                                                   For each frame, do:
    59                                                   1. detect the face;
    60                                                   2. apply the filter;
    61                                                   3. save the processed frame.
    62                                                   '''                    
    63         1      21063.0  21063.0      0.0          video_capture = cv2.VideoCapture(self.video_path)
    64         1          2.0      2.0      0.0          frame_count = 0
    65         1          1.0      1.0      0.0          output_frames = []
    66       106        583.0      5.5      0.0          while video_capture.isOpened():    
    67                                                       # Grab a single frame of video
    68       106     786890.0   7423.5      1.0              ret, frame = video_capture.read()
    69                                           
    70                                                       # Bail out when the video file ends
    71       106        380.0      3.6      0.0              if not ret:
    72         1        871.0    871.0      0.0                  video_capture.release()
    73         1          2.0      2.0      0.0                  break
    74                                                       
    75       105        179.0      1.7      0.0              frame_count += 1
    76                                           
    77                                                       # detect faces
    78       105        176.0      1.7      0.0              if face_detection_model != "mtcnn":
    79                                                           face_locations = self.face_detection(frame, 
    80                                                                                                model=face_detection_model)
    81                                                       else:
    82       105   36826700.0 350730.5     45.8                  face_locations = self.face_detection_mtcnn(frame)
    83                                           
    84                                                       # add effect
    85       105   42636175.0 406058.8     53.0              after_effect_frame = filter_effect(frame, face_locations)
    86                                           
    87       105        355.0      3.4      0.0              if self.display and frame_count % 2 == 0:
    88                                                          # If faces were found, we will mark it on frame with blue dots
    89                                                           for face_location in face_locations:
    90                                                               top, right, bottom, left = face_location
    91                                                               cv2.rectangle(after_effect_frame,(left, top), (right, bottom), (0, 0, 255), 2)
    92                                                           plt.imshow(after_effect_frame)
    93                                                           plt.show()
    94                                                           clear_output(wait=True)
    95                                           
    96       105        183.0      1.7      0.0              output_frames.append(after_effect_frame)
    97         1          2.0      2.0      0.0          self.num_frames = frame_count
    98         1     194552.0 194552.0      0.2          self.des_arr = np.array(output_frames)
```

In [15]:
# GPU + Convolution: use GPU for face detection (Pytorch), and CPU for blurring effects using Scipy convolution
def case3():
  case_3 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           display=False,
                           device='gpu')
  case_3.filter_on_video_face_only(case_3.mean_blur_convolution_face_only)
  case_3.write_to_video("peds_gpu_parallel_conv.mp4")

In [16]:
%lprun -f video_transformer_parallel.filter_on_video_face_only case3()

```
Timer unit: 1e-06 s

Total time: 36.3854 s
File: <ipython-input-4-6509e3bc97f5>
Function: filter_on_video_face_only at line 241

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   241                                               def filter_on_video_face_only(self, filter_func, radius=10, face_detection_model = 'mtcnn'):
   242                                                   '''
   243                                                   Filter on the entire video, but the filter function only take in the face regions 
   244                                                   instead of the entire frame to reduce data size.
   245                                                   '''
   246         1          6.0      6.0      0.0          k = 1 / (2*radius+1)**2
   247         1     132984.0 132984.0      0.4          self.des_arr = self.video_array.copy()
   248         1          7.0      7.0      0.0          frame_size = self.video_array.shape[0]
   249         1   24130893.0 24130893.0     66.3          self.locations = self.get_face_locations(face_detection_model)
   250                                           
   251       106        108.0      1.0      0.0          for i in range(frame_size):
   252       752       1656.0      2.2      0.0              for location in self.locations[i]:
   253       647       3509.0      5.4      0.0                  top, right, bottom, left = location
   254       647       1770.0      2.7      0.0                  face = self.des_arr[i, top:bottom+1, left:right+1, :]
   255       647       3153.0      4.9      0.0                  blur_face = np.empty_like(face)
   256       647   12102849.0  18706.1     33.3                  blur_face = filter_func(face=face, blur_face=blur_face, radius=radius)
   257       647       8510.0     13.2      0.0                  self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face

check
39scompleted at 9:25 PM
```

In [17]:
@cuda.jit
def img_blur_cuda(img, des_img, k, radius):
    '''
    numba cuda version of blurring algorithm
    '''
    i, j = cuda.grid(2)

    rows, columns, channel = img.shape
    if i >= rows or j >= columns:
        return

    ra = rows - radius
    ca = columns - radius
    if i < radius or j < radius or i >= ra or j >= ca:
        des_img[i, j, 0] = img[i, j, 0]
        des_img[i, j, 1] = img[i, j, 1]
        des_img[i, j, 2] = img[i, j, 2]
        return

    r = 0
    g = 0
    b = 0
    for x in range(-radius, radius + 1):
        for y in range(-radius, radius + 1):
            i_x = i + x
            j_y = j + y
            r += img[i_x, j_y, 0] * k
            g += img[i_x, j_y, 1] * k
            b += img[i_x, j_y, 2] * k
    des_img[i, j, 0] = r
    des_img[i, j, 1] = g
    des_img[i, j, 2] = b

In [19]:
# GPU + CUDA: use GPU for face detection (Pytorch), and CUDA for blurring effects
def case4():
  case_4 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           display=False,
                           device='gpu')
  case_4.filter_on_video_cuda(img_blur_cuda)
  case_4.write_to_video("peds_cuda_gpu.mp4")

In [20]:
%lprun -f video_transformer_parallel.filter_on_video_cuda case4()

```Timer unit: 1e-06 s

Total time: 28.8781 s
File: <ipython-input-4-6509e3bc97f5>
Function: filter_on_video_cuda at line 204

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   204                                               def filter_on_video_cuda(self, img_blur_cuda, radius=10, face_detection_model = 'mtcnn'):
   205                                                   '''
   206                                                   Filter on entire video using CUDA for blurring. It specifies block size and controls bounds.
   207                                                   '''
   208         1          4.0      4.0      0.0          blocksize = (32,32)
   209         1          5.0      5.0      0.0          k = 1 / (2*radius+1)**2
   210         1     134278.0 134278.0      0.5          self.des_arr = self.video_array.copy()
   211         1          7.0      7.0      0.0          frame_size = self.video_array.shape[0]
   212         1   25281999.0 25281999.0     87.5          self.locations = self.get_face_locations(face_detection_model)
   213         1      18775.0  18775.0      0.1          np.save("frame_test.out", self.des_arr[20])
   214         1       3910.0   3910.0      0.0          np.save("locations_tests.out", self.locations[20])
   215       106        123.0      1.2      0.0          for i in range(frame_size):
   216       752       1745.0      2.3      0.0              for location in self.locations[i]:
   217       647       3491.0      5.4      0.0                  top, right, bottom, left = location
   218       647      11169.0     17.3      0.0                  face = np.ascontiguousarray(self.des_arr[i, top:bottom+1, left:right+1, :])
   219       647       1285.0      2.0      0.0                  gridsize = (face.shape[0]//blocksize[0]+1, face.shape[1]//blocksize[1]+1)
   220       647       3653.0      5.6      0.0                  blur_face = np.empty_like(face)
   221       647    3406597.0   5265.2     11.8                  img_blur_cuda[gridsize, blocksize](face, blur_face, k, radius)
   222       647      11079.0     17.1      0.0                  self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face
   ```

In [21]:
%load_ext Cython

In [22]:
%%cython
import cv2
from scipy.signal import convolve2d
import numpy as np
cimport numpy as np
ctypedef np.npy_intp SIZE_t

def mean_blur_convolution_cython(np.ndarray[np.uint8_t, ndim=3] image, 
                                 np.ndarray[np.uint8_t, ndim=3] des_img, 
                                 np.ndarray locations, 
                                 double radius):
    '''
    Utilized Scipy convolution to perform blurring effect.
    '''
    if len(locations) == 0:
          # print("No faces")
          return
    cdef int n_neighbor = np.int32((radius)*2+1)
    cdef int height = des_img.shape[0]
    cdef int width = des_img.shape[1]
    cdef np.ndarray[int, ndim = 2]kernel = np.ones((n_neighbor, n_neighbor),dtype=np.int32)
    cdef int t_, b_, l_, r_, bound_top, bound_bottom, bound_left, bound_right

    for (top, right, bottom, left) in locations:
        t_ = np.int32(max(top+radius,0))
        b_ = np.int32(min(bottom-radius, height))
        l_ = np.int32(max(left+radius,0))
        r_ = np.int32(min(right-radius, width))

        bound_top = np.int32(t_-radius)
        bound_bottom = np.int32(b_+radius+1)
        bound_left = np.int32(l_-radius)
        bound_right = np.int32(r_+radius+1)

        if t_ >= b_ or l_ >= r_:
            continue
        
        sample_area = image[bound_top:bound_bottom, 
                            bound_left:bound_right,:]
        
        # np.save("face.out", sample_area)
        red = convolve2d(sample_area[:,:,np.int32(0)], kernel, 'same')
        green = convolve2d(sample_area[:,:,np.int32(1)], kernel, 'same')
        blue = convolve2d(sample_area[:,:,np.int32(2)], kernel, 'same')

        convol_area = np.stack([red, green, blue], axis=2)

        des_img[bound_top:bound_bottom, 
               bound_left:bound_right,:] = (convol_area / (n_neighbor**2)).astype(np.uint8)
        cv2.rectangle(des_img, (left, top), (right, bottom), (0, 0, 255), 2)  

In [31]:
# GPU + Cythonized Convolution: use GPU for face detection (Pytorch), and use Cython to compile the Scipy Convolution solution
def case5():
  case_5 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           display=False,
                           device='gpu')
  case_5.filter_on_video(mean_blur_convolution_cython)
  case_5.write_to_video("peds_gpu_parallel_cython.mp4")

In [32]:
%lprun -f video_transformer_parallel.filter_on_video case5()

```
Timer unit: 1e-06 s

Total time: 36.4354 s
File: <ipython-input-4-6509e3bc97f5>
Function: filter_on_video at line 193

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   193                                               def filter_on_video(self, filter_func, face_detection_model = 'mtcnn', radius = 10):
   194                                                   '''
   195                                                   Apply filter on the video.
   196                                                   '''
   197         1     133892.0 133892.0      0.4          self.des_arr = self.video_array.copy()
   198         1          4.0      4.0      0.0          frame_size = self.video_array.shape[0]
   199         1   24316991.0 24316991.0     66.7          self.locations = self.get_face_locations(face_detection_model)
   200         1          3.0      3.0      0.0          radius_list = [radius] * frame_size
   201                                           
   202         1   11984474.0 11984474.0     32.9          list(map(filter_func, self.video_array, self.des_arr, self.locations, radius_list))
   ```

In [27]:
# GPU + Multiprocessing Convolution: use GPU for face detection (Pytorch), and use multiprocessing to blur the images
def case6():
  case_6 = video_transformer_multiprocessing(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           N=4,
                           display=False,
                           device='gpu')
  case_6.filter_on_video_mult(case_6.mean_blur_convolution_face_only)
  case_6.write_to_video("peds_gpu_conv_face_only_mult.mp4")

In [28]:
%lprun -f video_transformer_multiprocessing.filter_on_video_mult case6()

(105, 1080, 1920, 3)


```
Timer unit: 1e-06 s

Total time: 39.5816 s
File: <ipython-input-5-c6cf6f63823e>
Function: filter_on_video_mult at line 47

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    47                                               def filter_on_video_mult(self, filter_func, radius=10, face_detection_model = 'mtcnn'):
    48                                                   '''
    49                                                   Divide the entire video into several parts, and apply blurring in parallel.
    50                                                   '''
    51         1     139598.0 139598.0      0.4          self.des_arr = self.video_array.copy()
    52         1          7.0      7.0      0.0          frame_size = self.video_array.shape[0]
    53         1   24297241.0 24297241.0     61.4          self.locations = self.get_face_locations(face_detection_model)
    54         1          4.0      4.0      0.0          self.radius = radius
    55                                           
    56         1         85.0     85.0      0.0          frames_portions = list(np.array_split(self.des_arr, self.N))
    57         1         78.0     78.0      0.0          locations_portions  = list(np.array_split(self.locations, self.N))
    58         1        469.0    469.0      0.0          q = Queue()
    59         1          1.0      1.0      0.0          jobs = []
    60         1          0.0      0.0      0.0          rets = []
    61                                           
    62         1         82.0     82.0      0.0          lock = Lock()
    63                                           
    64                                           
    65         5         55.0     11.0      0.0          for i in range(self.N):
    66         4        756.0    189.0      0.0              p = Process(target=self.mean_blur_some_frame, args=(frames_portions[i],locations_portions[i], q, i))
    67         4         24.0      6.0      0.0              p.Daemon = True
    68         4         37.0      9.2      0.0              jobs.append(p)
    69         4     153934.0  38483.5      0.4              p.start()
    70                                                       
    71         5         19.0      3.8      0.0          for p in jobs:
    72         4   14532106.0 3633026.5     36.7              ret = q.get() # will block
    73         4         27.0      6.8      0.0              rets.append(ret)
    74         5          8.0      1.6      0.0          for p in jobs:
    75         4       7610.0   1902.5      0.0              p.join()
    76                                                       
    77                                                   # sort them by the index to restore order
    78         1         13.0     13.0      0.0          rets.sort(key=lambda x:x[0])
    79                                           
    80         1         79.0     79.0      0.0          rets = np.array([ret[1] for ret in rets])
    81                                                   
    82         1     449159.0 449159.0      1.1          self.des_arr = np.concatenate(rets).astype(np.uint8)
    83         1        250.0    250.0      0.0          print(self.des_arr.shape)
```

In [None]:
# GPU + CV2 (Benchmark): use GPU for face detection (Pytorch), and use cv2.blur() for blurring 
def case7():
  case_cv2 = video_transformer_parallel(path = path,
                           save_path = os.path.join(path, "data", "frames"),
                           file_name = 'peds.mp4',
                           display=False,
                           device='gpu')
  case_cv2.filter_on_video_cv2(cv2.blur)
  case_cv2.write_to_video("peds_gpu_cv2.mp4")


In [30]:
%lprun -f video_transformer_parallel.filter_on_video_cv2 case7()

```
Timer unit: 1e-06 s

Total time: 24.5038 s
File: <ipython-input-4-6509e3bc97f5>
Function: filter_on_video_cv2 at line 224

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   224                                               def filter_on_video_cv2(self, cv2_blur, radius=10, face_detection_model = 'mtcnn'):
   225                                                   '''
   226                                                   Filter on entire video using CV2.blur.
   227                                                   '''
   228         1          7.0      7.0      0.0          k = 1 / (2*radius+1)**2
   229         1     142281.0 142281.0      0.6          self.des_arr = self.video_array.copy()
   230         1          5.0      5.0      0.0          frame_size = self.video_array.shape[0]
   231         1   24314501.0 24314501.0     99.2          self.locations = self.get_face_locations(face_detection_model)
   232                                                   
   233       106         62.0      0.6      0.0          for i in range(frame_size):
   234       752        715.0      1.0      0.0              for location in self.locations[i]:
   235       647       1229.0      1.9      0.0                  top, right, bottom, left = location
   236       647       1207.0      1.9      0.0                  face = self.des_arr[i, top:bottom+1, left:right+1, :]
   237       647       1559.0      2.4      0.0                  blur_face = np.empty_like(face)
   238       647      39785.0     61.5      0.2                  cv2_blur(src=face, dst=blur_face, ksize=(radius,radius))
   239       647       2439.0      3.8      0.0                  self.des_arr[i, top:bottom+1, left:right+1, :] = blur_face
```