# Video Conversion from RGB to YUV

# Import Libraries

In [1]:
import cv2
import numpy as np

# Function to Convert a video into frames
Since any video is a series of images, we need to parallelise the process of converting these individual images from RGB to YUV. 
The variable buf has all the numpy arrays of the individual frames of the video

In [2]:
def convert_to_frames(video):
    cap = cv2.VideoCapture(video)
    fps = cap.get(cv2.CAP_PROP_FPS)
    print("Video frame rate:",fps)
    frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    buf = np.empty((frameCount, frameHeight, frameWidth, 3), np.dtype('uint8'))

    fc = 0
    ret = True

    while (fc < frameCount  and ret):
        ret, buf[fc] = cap.read()
        fc += 1

    cap.release()
    return buf

Buffered Frames holds the same frames of the videos just like the variable buf.

GIF format files are similar to using Video files with lower frame rates. But if u want to execute video files you can do so as well. Replace it with eg.gif with the video files name

In [3]:
buffered_frames = convert_to_frames("eg.gif")

Video frame rate: 20.0


Lets check what do the individual frames look like and how many frames are there in this video

In [28]:
print("Number of Frames in this video are::",len(buffered_frames))
cv2.imshow('Frame 1', buffered_frames[0])
cv2.imshow('Frame 10', buffered_frames[9])
# cv2.imshow('Frame 100', buffered_frames[99])
cv2.imshow('Last frame', buffered_frames[len(buffered_frames)-1])
cv2.waitKey(0)
cv2.destroyAllWindows()


Number of Frames in this video are:: 57


# Converting the individual images from RGB to YUV 
The make_lut_u and make_lut_v functions help in removing the frames Y, U, V filters, where Y is the black and white filter used in olden days television. 
These images are stacked up over each other.
The results are stored in Converted folder.

#### The cell below uses the CPU in the most efficient manner to convert images from RGB to YUV using cv2 library but does not use the GPU

In [None]:
def rgb_to_yuv(frame,frame_no):
    
    img_yuv = cv2.cvtColor(frame, cv2.COLOR_BGR2YUV)

    result = np.vstack([frame, img_yuv])
    cv2.imwrite("Converted/frame "+str(frame_no)+".jpg", result)

#### The below cell does the same work as the above cell but using matrix multiplication on CUDA kernel

In [9]:
from __future__ import division
from numba import cuda
import numpy
import math

# CUDA kernel
@cuda.jit
def matmul(A, B, C):
    """Perform matrix multiplication of C = A * B
    """
    row, col = cuda.grid(2)
    if row < C.shape[0] and col < C.shape[1]:
        tmp = 0.
        for k in range(A.shape[1]):
            tmp += A[row, k] * B[k, col]
        C[row, col] = tmp
        

def rgb2yuv_GPU(frame,frame_no):
    # Host code

    # Initialize the data arrays
    # The data array
    A = frame.astype("float32")
    A = A.reshape((frame.shape[0] * frame.shape[1], 3))
    
    # Image Filter for converting RGB to YUV
    B =  np.array([[ 0.29900, -0.16874,  0.50000],
                 [0.58700, -0.33126, -0.41869],
                 [ 0.11400, 0.50000, -0.08131]])
    B = B.astype("float32")
    dimensions_A = A.shape
    # Copy the arrays to the device
    A_global_mem = cuda.to_device(A)
    B_global_mem = cuda.to_device(B)

    # Allocate memory on the device for the result
    C_global_mem = cuda.device_array(dimensions_A)

    # Configure the blocks
    threadsperblock = (16, 16)
    blockspergrid_x = int(math.ceil(A.shape[0] / threadsperblock[0]))
    blockspergrid_y = int(math.ceil(B.shape[1] / threadsperblock[1]))
    blockspergrid = (blockspergrid_x, blockspergrid_y)

    # Start the kernel 
    matmul[blockspergrid, threadsperblock](A_global_mem, B_global_mem, C_global_mem)

    # Copy the result back to the host
    expected = C_global_mem.copy_to_host()

    img_yuv = expected.reshape(frame.shape)
    img_yuv[:,:,1:]+=128.0
    img_yuv = img_yuv.astype("uint8")
    
    result = np.vstack([frame, img_yuv])
    cv2.imwrite("Converted/frame "+str(frame_no)+".jpg", result)

#### The below cell does the same work as the above cell but using matrix multiplication function in numpy library on the CPU 

In [6]:
def rgb2yuv_CPU(frame,frame_no):
    m =  np.array([[ 0.29900, -0.16874,  0.50000],
             [0.58700, -0.33126, -0.41869],
             [ 0.11400, 0.50000, -0.08131]])
    yuv_filter = m.astype("float32")
    
    a = frame.astype("float32")
    a = a.reshape((frame.shape[0] * frame.shape[1], 3))
    
    expected = np.matmul(a, yuv_filter)

    img_yuv = expected.reshape(frame.shape)
    img_yuv[:,:,1:]+=128.0
    img_yuv = img_yuv.astype("uint8")
    
    result = np.vstack([frame, img_yuv])
    cv2.imwrite("Converted/frame "+str(frame_no)+".jpg", result)

# Serial Execution of RGB to YUV convertion
The above function is executed serially. The next images conversion will only take place after the previous one has been converted.
Warning: Please dont run this program if u do not have Ram greater than 8GB

### CPU EXECUTION 

In [22]:
%%time
for i in range(0,len(buffered_frames)):
    rgb2yuv_CPU(buffered_frames[i],i+1)


Wall time: 688 ms


### GPU EXECUTION

In [23]:
%%time
for i in range(0,len(buffered_frames)):
    rgb2yuv_GPU(buffered_frames[i],i+1)

Wall time: 930 ms


# Parallel Execution of RGB to YUV convertion
The same function is now running on threads and produces the same results as serial but executes faster compared to serial execution.
Why use threading?
Because each frame is like an image from a video. Each frame needs to be given to separate processor using threads.

In [29]:
from threading import Thread

### CPU EXECUTION with THREADING

In [21]:
%%time
for i in range(0,len(buffered_frames)):
    Thread(target = rgb2yuv_CPU, args=[buffered_frames[i], i+1 ]).start()


Wall time: 198 ms


### GPU EXECUTION with THREADING 

In [30]:
%%time
for i in range(0,len(buffered_frames)):
    Thread(target = rgb2yuv_GPU, args=[buffered_frames[i], i+1 ]).start()

Wall time: 38 ms


# Results: the execution is faster on GPU