In [19]:
from nvidia.dali import pipeline_def
import nvidia.dali.fn as fn
import nvidia.dali.types as types
import numpy as np
import matplotlib.pyplot as plt
import math
import os.path

# test_data_root = os.environ["DALI_EXTRA_PATH"]
# db_folder = os.path.join(test_data_root, "db", "lmdb")

In [20]:
def random_transform():
    dst_cx, dst_cy = (200, 200)
    src_cx, src_cy = (200, 200)

    # This function uses homogeneous coordinates - hence, 3x3 matrix

    # translate output coordinates to center defined by (dst_cx, dst_cy)
    t1 = np.array([[1, 0, -dst_cx], [0, 1, -dst_cy], [0, 0, 1]])

    def u():
        return np.random.uniform(-0.5, 0.5)

    # apply a randomized affine transform - uniform scaling + some random
    # distortion
    m = np.array([[1 + u(), u(), 0], [u(), 1 + u(), 0], [0, 0, 1]])

    # translate input coordinates to center (src_cx, src_cy)
    t2 = np.array([[1, 0, src_cx], [0, 1, src_cy], [0, 0, 1]])

    # combine the transforms
    m = np.matmul(t2, np.matmul(m, t1))

    # remove the last row; it's not used by affine transform
    return m[0:2, 0:3].astype(np.float32)


np.random.seed(seed=123)

In [21]:
class ExternalInputCallable(object):
    def __call__(self, sample_info):
        with open("/home/qsvm/lib_ocr/experiment/vietocr_img_441026.jpg", 'rb') as f:
            file_bytes = f.read()
        
        image = np.frombuffer(file_bytes, dtype=np.uint8)
        return image

In [22]:
@pipeline_def(seed=12)
def example_pipeline():
    # This example uses external_source to provide warp matrices
    transform = fn.external_source(
        batch=False, source=random_transform, dtype=types.FLOAT
    )

    images = fn.external_source(
        source=ExternalInputCallable(),
        num_outputs=1,
        batch=False,
        parallel=True,
        dtype=[types.UINT8],
        prefetch_queue_depth=2,
    )

    # The decoder takes tensors containing raw files and outputs images
    # as 3D tensors with HWC layout
    images = fn.decoders.image(images)

    warped_gpu = fn.warp_affine(
        images,
        transform,  # pass the transform parameters through GPU memory
        size=(400, 400),  # specify the output size
        # fill_value,       # not specifying `fill_value`
        #  results in source coordinate clamping
        interp_type=types.INTERP_LINEAR,
    )  # use linear interpolation

    warped_cpu = fn.warp_affine(
        images,
        matrix=transform,  # pass the transform through a named input
        fill_value=200,
        size=(400, 400),  # specify the output size
        interp_type=types.INTERP_NN,
    )  # use nearest neighbor interpolation

    warped_keep_size = fn.warp_affine(
        images,
        transform,
        # size,        # keep the original canvas size
        interp_type=types.INTERP_LINEAR,
    )  # use linear interpolation
    return (
        transform,
        images,
        warped_gpu,
        warped_cpu,
        warped_keep_size,
    )

In [23]:
batch_size = 1
pipe = example_pipeline(batch_size=batch_size, num_threads=2, device_id=0)
pipe.build()

RuntimeError: Error when starting Python worker threads for DALI parallel External Source. Cannot fork a process when the CUDA has been initialized in the process. CUDA is initialized during ``Pipeline.build()``, or can be initialized by another library that interacts with CUDA, for example a DL framework creating CUDA tensors. If you are trying to build multiple pipelines that use Python workers, you will need to call ``start_py_workers`` method on all of them before calling ``build`` method of any pipeline to start Python workers before CUDA is initialized by ``build`` or other CUDA operation. Alternatively you can change Python workers starting method from ``fork`` to ``spawn`` (see DALI Pipeline's ``py_start_method`` option for details). 

In [24]:
pipe_out = pipe.run()

RuntimeError: Error when starting Python worker threads for DALI parallel External Source. Cannot fork a process when the CUDA has been initialized in the process. CUDA is initialized during ``Pipeline.build()``, or can be initialized by another library that interacts with CUDA, for example a DL framework creating CUDA tensors. If you are trying to build multiple pipelines that use Python workers, you will need to call ``start_py_workers`` method on all of them before calling ``build`` method of any pipeline to start Python workers before CUDA is initialized by ``build`` or other CUDA operation. Alternatively you can change Python workers starting method from ``fork`` to ``spawn`` (see DALI Pipeline's ``py_start_method`` option for details). 

In [None]:
n = 0  # change this value to see other images from the batch;
# it must be in 0..batch_size-1 range

# from synsets import imagenet_synsets
import matplotlib.gridspec as gridspec

len_outputs = len(pipe_out) - 2

captions = [
    "original",
    "warp GPU (linear, border clamp)",
    "warp CPU (nearest, fill)",
    "warp GPU (keep canvas size)",
]

fig = plt.figure(figsize=(16, 12))
# plt.suptitle(imagenet_synsets[pipe_out[0].at(n)[0]], fontsize=16)
columns = 2
rows = int(math.ceil(len_outputs / columns))
gs = gridspec.GridSpec(rows, columns)

print("Affine transform matrix:")
print(pipe_out[1].at(n))

for i in range(len_outputs):
    plt.subplot(gs[i])
    plt.axis("off")
    plt.title(captions[i])
    pipe_out_cpu = pipe_out[2 + i].as_cpu()
    img_chw = pipe_out_cpu.at(n)
    plt.imshow((img_chw) / 255.0)