# cv.cudacodec

In [1]:
import cv2 as cv
import os
import time
import numpy as np

## Transcoding Example

Quick example comparing cv.cudacodec transcoding with cv on ubuntu using python wheel from https://github.com/cudawarped/opencv-python-cuda-wheels/releases/tag/4.6.0.20221102.

GPU: Mobile RTX 3070 Ti (5th gen decoder & 7th gen encoder)

CPU: i7-12700H

The benchmark results are not comparable because CPU hardware decoding was not available for the chosen codecs.

In [2]:
vid_path_in_4k = '/home/b/media/jellyfish-120-mbps-4k-uhd-h264.mkv'
vid_path_in_out_1080p = '/home/b/media/jelly_1080p.hevc'
vid_path_out_1080p = '/home/b/media/jelly.h264'
vid_path_out_1080p_mp4 = '/home/b/media/jelly.mp4'

In [3]:
print(cv.getBuildInformation())


  Version control:               4.6.0-504-gee9137f176

  Extra modules:
    Location (extra):            /home/b/repos/opencv/opencv-python/opencv_contrib/modules
    Version control (extra):     4.6.0-106-g9d84eaed

  Platform:
    Timestamp:                   2022-11-02T16:24:13Z
    Host:                        Linux 5.10.16.3-microsoft-standard-WSL2 x86_64
    CMake:                       3.24.1
    CMake generator:             Ninja
    CMake build tool:            /usr/bin/ninja
    Configuration:               Release

  CPU/HW features:
    Baseline:                    SSE SSE2 SSE3
      requested:                 SSE3
    Dispatched code generation:  SSE4_1 SSE4_2 FP16 AVX AVX2 AVX512_SKX
      requested:                 SSE4_1 SSE4_2 AVX FP16 AVX2 AVX512_SKX
      SSE4_1 (16 files):         + SSSE3 SSE4_1
      SSE4_2 (1 files):          + SSSE3 SSE4_1 POPCNT SSE4_2
      FP16 (0 files):            + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 AVX
      AVX (4 files):             + SS

### GPU

In [4]:
def transcode(vid_path_in, vid_path_out, codec = cv.cudacodec.H264, params = cv.cudacodec_VideoReaderInitParams()):
    reader = cv.cudacodec.createVideoReader(vid_path_in,params=params)
    reader.set(cv.cudacodec.COLOR_FORMAT_BGR)
    format = reader.format()
    if params.targetSz != (0,0):
        w,h = params.targetSz
    else:
        w,h = (format.width,format.height)
        
    frame = cv.cuda.GpuMat(h,w,cv.CV_8UC3)
    writer = cv.cudacodec.createVideoWriter(vid_path_out,[w,h],codec)
    n_frames = 0
    start = time.time()
    ret, _ = reader.nextFrame(frame)
    while(ret):
        n_frames += 1
        writer.write(frame)
        ret, _ = reader.nextFrame(frame)
    writer.release()
    end = time.time()
    return n_frames/(end - start), n_frames;

First convert 4K(h264) to 1080p(hevc) for benchmarking

In [5]:
params = cv.cudacodec_VideoReaderInitParams()
params.targetSz = (1920,1080)
fps, n_frames = transcode(vid_path_in_4k,vid_path_in_out_1080p,cv.cudacodec.HEVC,params)
print(f'Transcoded {n_frames} frames from 4k(h264) to 1080p(hevc) at fps= {fps:.2f}')

Transcoded 900 frames from 4k(h264) to 1080p(hevc) at fps= 130.99


Benchmark 1080p transcoding - timings will be slightly optimistic because decoding begins as soon as the VideoWriter is created

In [6]:
params = cv.cudacodec_VideoReaderInitParams()
params.targetSz = (1920,1080)
fps, n_frames = transcode(vid_path_in_out_1080p,vid_path_out_1080p,cv.cudacodec.H264,params)
print(f'Transcoded {n_frames} frames from 1080p(hevc) to 1080p(h264) at fps = {fps:.2f}')

Transcoded 900 frames from 1080p(hevc) to 1080p(h264) at fps = 544.81


### CPU

Hardware acceleration does not appear to be available for this codec

In [7]:
def transcode_cpu(vid_path_in, vid_path_out):    
    cap = cv.VideoCapture(vid_path_in,cv.CAP_FFMPEG,(cv.CAP_PROP_HW_ACCELERATION, cv.VIDEO_ACCELERATION_ANY))
    #fourcc = cv.VideoWriter_fourcc(*"mp4v")
    fourcc = cv.VideoWriter_fourcc(*"avc1")
    fps = cap.get(cv.CAP_PROP_FPS)
    width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
    writer = cv.VideoWriter(vid_path_out, fourcc, fps, (width,height),
                            (cv.VIDEOWRITER_PROP_HW_ACCELERATION, cv.VIDEO_ACCELERATION_ANY))
    frame = np.zeros((height,width,3),dtype='uint8')
    n_frames = 0
    start = time.time()
    ret, _ = cap.read(frame)
    while(ret):
        n_frames += 1
        writer.write(frame)
        ret, _ = cap.read(frame)
    writer.release()
    end = time.time()
    return n_frames/(end - start), n_frames;

Benchmarking 1080p transcoding without hardware acceleration

In [8]:
fps, n_frames = transcode_cpu(vid_path_in_out_1080p,vid_path_out_1080p_mp4)
print(f'Transcoded {n_frames} frames from 1080p(hevc) to 1080p(h264) at fps = {fps:.2f}')

Transcoded 900 frames from 1080p(hevc) to 1080p(h264) at fps = 65.46
