In [None]:
# Installing the Dali Pipeline & complatible CUDA version 
#!pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-cuda110

### Defining the Pipeline



In [None]:
#importing required libararies 
from nvidia.dali.pipeline import Pipeline
from nvidia.dali import pipeline_def
import nvidia.dali.fn as fn
import nvidia.dali.types as types

#Path of Data 
image_dir = "data/images"
max_batch_size = 8

#pipeline creation by using decorator 
@pipeline_def
def simple_pipeline():
    #Reading the encoded images and labels from hard drive 
    jpegs, labels = fn.readers.file(file_root=image_dir)
    # Decode the images from jpeg to RGB
    images = fn.decoders.image(jpegs, device='cpu')

    return images, labels

### Building the Pipeline

In [None]:
#Building the pipeline 
pipe = simple_pipeline(batch_size=max_batch_size, num_threads=1, device_id=0)
pipe.build()

### Running the Pipeline



In [None]:
#Running the pipeline.
pipe_out = pipe.run()
print(pipe_out)

In [None]:
#Checking the TensorList
images, labels = pipe_out
print("Images is_dense_tensor: " + str(images.is_dense_tensor()))
print("Labels is_dense_tensor: " + str(labels.is_dense_tensor()))

In [None]:
#checking the shapes of images and labels
import numpy as np

labels_tensor = labels.as_tensor()

print (labels_tensor.shape())
print (np.array(labels_tensor))

In [None]:
#Plotting the decoded images 
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
%matplotlib inline

def show_images(image_batch):
    columns = 4
    rows = (max_batch_size + 1) // (columns)
    fig = plt.figure(figsize = (32,(32 // columns) * rows))
    gs = gridspec.GridSpec(rows, columns)
    for j in range(rows*columns):
        plt.subplot(gs[j])
        plt.axis("off")
        plt.imshow(image_batch.at(j))

In [None]:
show_images(images)

## Adding Augmentations



In [None]:
# Adding Random Suffle to get perfect mix all classes 
@pipeline_def
def shuffled_pipeline():
    #intial fill sets capacity of setting this 21 because we have small dataset
    jpegs, labels = fn.readers.file(file_root=image_dir, random_shuffle=True, initial_fill=21)
    images = fn.decoders.image(jpegs, device='cpu')

    return images, labels

In [None]:
pipe = shuffled_pipeline(batch_size=max_batch_size, num_threads=1, device_id=0, seed=1234)
pipe.build()

In [None]:
pipe_out = pipe.run()
images, labels = pipe_out
show_images(images)



### Augmentations



In [None]:
# Adding rotations for obtaining the variety in datasets
@pipeline_def
def rotated_pipeline():
    jpegs, labels = fn.readers.file(file_root=image_dir, random_shuffle=True, initial_fill=21)
    images = fn.decoders.image(jpegs, device='cpu')
    rotated_images = fn.rotate(images, angle=10.0, fill_value=0)

    return rotated_images, labels

To do that, we added a new operation to our pipeline: `fn.rotate`.

In [None]:
pipe = rotated_pipeline(batch_size=max_batch_size, num_threads=1, device_id=0, seed=1234)
pipe.build()

In [None]:
pipe_out = pipe.run()
images, labels = pipe_out
show_images(images)

### Tensors as Arguments and Random Number Generation


In [None]:
# Adding random rotations
@pipeline_def
def random_rotated_pipeline():
    jpegs, labels = fn.readers.file(file_root=image_dir, random_shuffle=True, initial_fill=21)
    images = fn.decoders.image(jpegs, device='cpu')
    angle = fn.random.uniform(range=(-10.0, 10.0))
    rotated_images = fn.rotate(images, angle=angle, fill_value=0)

    return rotated_images, labels

In [None]:
pipe = random_rotated_pipeline(batch_size=max_batch_size, num_threads=1, device_id=0, seed=1234)
pipe.build()

In [None]:
pipe_out = pipe.run()
images, labels = pipe_out
show_images(images)

In [None]:
#Copying the Tensors to GPU 
@pipeline_def
def random_rotated_gpu_pipeline():
    jpegs, labels = fn.readers.file(file_root=image_dir, random_shuffle=True, initial_fill=21)
    images = fn.decoders.image(jpegs, device='cpu')
    angle = fn.random.uniform(range=(-10.0, 10.0))
    #changes operation to images.gpu for copying to GPU
    rotated_images = fn.rotate(images.gpu(), angle=angle, fill_value=0)

    return rotated_images, labels

In [None]:
pipe = random_rotated_gpu_pipeline(batch_size=max_batch_size, num_threads=1, device_id=0, seed=1234)
pipe.build()

In [None]:
pipe_out = pipe.run()
print(pipe_out)

In [None]:
# In order to access the TensorListGPU for visulaization we need to convert it on cpu.
images, labels = pipe_out
show_images(images.as_cpu())

In [None]:
# Mixed Decoding enables decoding process between CPU and GPU.
# %%timeit
@pipeline_def
def hybrid_pipeline():
    jpegs, labels = fn.readers.file(file_root=image_dir, random_shuffle=True, initial_fill=21)
    images = fn.decoders.image(jpegs, device='mixed')

    return images, labels

In [None]:
pipe = hybrid_pipeline(batch_size=max_batch_size, num_threads=1, device_id=0, seed=1234)
pipe.build()

In [None]:
pipe_out = pipe.run()
images, labels = pipe_out
show_images(images.as_cpu())

In [None]:
# Creating and Checking time required for CPU and GPU Processing .
from timeit import default_timer as timer

test_batch_size = 64

def speedtest(pipeline, batch, n_threads):
    pipe = pipeline(batch_size=batch, num_threads=n_threads, device_id=0)
    pipe.build()
    # warmup
    for i in range(5):
        pipe.run()
    # test
    n_test = 20
    t_start = timer()
    for i in range(n_test):
        pipe.run()
    t = timer() - t_start
    print("Speed: {} imgs/s".format((n_test * batch)/t))

In [None]:
#Time taken on CPU.
speedtest(shuffled_pipeline, test_batch_size, 4)

In [None]:
#Time taken on GPU.
speedtest(hybrid_pipeline, test_batch_size, 4)

As we can see, using GPU accelerated decoding resulted in significant speedup.