<a href="https://colab.research.google.com/github/doudi25/Triton/blob/main/RGB_to_GRAYSCALE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import triton
import triton.language as tl
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
@triton.jit
def rgb_kernel(image_ptr,result_ptr,n_elements,stride_ab,BLOCK_SIZE:tl.constexpr):
  # program id
  pid = tl.program_id(axis=0)
  # compute program_id
  block_id = pid * BLOCK_SIZE
  # assign the offset
  offsets = block_id + tl.arange(0,BLOCK_SIZE)
  # red_pointers to load the values correspending to red grid
  r_ptr = image_ptr + offsets
  # green_pointers are calculating by jumping to the next grid using the stride wich equal to (H*W)
  g_ptr = image_ptr + offsets * stride_ab
  # blue pointers are calculating by jumping two grids => (H * W * 2 )
  b_ptr = image_ptr + offsets * 2 * stride_ab
  # assign the mask
  mask = offsets < n_elements
  # load the red , green , blue values
  red = tl.load(r_ptr,mask=mask)
  green = tl.load(g_ptr,mask=mask)
  blue = tl.load(b_ptr,mask=mask)
  # converting rgb to gray_scale using the following formula
  result = 0.299 * red + 0.587 * green + 0.114 * blue
  # store result
  tl.store(result_ptr + offsets,result,mask=mask)

In [3]:

def convert_rgb_to_gray(image):
  # assert that the image is rgb
  assert image.ndim == 3 and image.shape[0] == 3
  # assert contiguouty
  c , h , w = image.shape
  # pointing output tensor in the memory
  result = torch.empty((h,w),device='cuda')
  n_elements = int(h * w)
  # grid configuration
  grid = lambda meta: (triton.cdiv(n_elements,meta['BLOCK_SIZE']),)
  # launch the kernel
  rgb_kernel[grid](image,result,n_elements,image.stride(0),BLOCK_SIZE=1024)
  # add batch dim
  return result.unsqueeze(0)


In [4]:
from torchvision.io import read_image

In [7]:
image = read_image('/content/renaming.png').type(torch.float32)
image = image.to('cuda')
result = convert_rgb_to_gray(image)
