In [94]:
import cv2
import numpy as np
import time

In [110]:
cv2.__version__

'4.5.0'

# OpenCV Resize

In [95]:
img = np.random.random((2048, 2048,3)).astype(np.uint8)

## Without pre-alloc

In [96]:
resize_width = 1024
resize_height = 1024 
loop_cnt = 100000

### CPU

In [97]:
time_start = time.time() 
for _ in range(loop_cnt): 
    resize_img = cv2.resize(img, (resize_height, resize_width), interpolation=cv2.INTER_LINEAR) 
print('cpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt))

cpu time: 952.11 us


### GPU

In [98]:
device_img = cv2.cuda_GpuMat()

#### Upload/Dload for single GPU operation

In [99]:
time_start = time.time() 
for _ in range(loop_cnt): 
    device_img.upload(img) 
    resize_device_img = cv2.cuda.resize(device_img, (resize_height, resize_width),interpolation=cv2.INTER_LINEAR)
    resize_img = resize_device_img.download()
print('gpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) 

gpu time: 3926.30 us


#### Normal operation

In [100]:
device_img = cv2.cuda_GpuMat(img)

In [101]:
time_start = time.time() 
for _ in range(loop_cnt): 
    resize_device_img = cv2.cuda.resize(device_img, (resize_height, resize_width),interpolation=cv2.INTER_LINEAR)
print('gpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) 

gpu time: 460.10 us


## With pre-alloc

### CPU

In [102]:
resize_img = np.zeros((resize_height, resize_width,3),dtype=np.uint8)

In [103]:
time_start = time.time() 
for _ in range(loop_cnt): 
    cv2.resize(img, (resize_height, resize_width), resize_img, interpolation=cv2.INTER_LINEAR)
print('cpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) 

cpu time: 390.59 us


### GPU

In [111]:
device_img = cv2.cuda_GpuMat(img)
resize_device_img = cv2.cuda_GpuMat(resize_img)

In [112]:
time_start = time.time() 
for _ in range(loop_cnt):     
    cv2.cuda.resize(device_img, (resize_height, resize_width),resize_device_img,interpolation=cv2.INTER_LINEAR)
print('gpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) 

gpu time: 87.67 us


#### Check timer

In [106]:
%timeit cv2.cuda.resize(device_img, (resize_height, resize_width),dst=resize_device_img,interpolation=cv2.INTER_LINEAR)

89.9 µs ± 1.53 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


#### Streams - unrealistic without pause, check for GPU saturation

In [107]:
stream = cv.cuda_Stream()

In [108]:
%timeit cv2.cuda.resize(device_img, (resize_height, resize_width),dst=resize_device_img,interpolation=cv2.INTER_LINEAR, stream=stream)

44.3 µs ± 377 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
