In [1]:
import numpy as np
import cupy as cp # import CuPy library

In [2]:
# CUDA source
loaded_from_source = r'''
  extern "C" {
  // Kernel Function C_{1} matrix
__global__ void C1Matrix(float *A, float *B, float *C, int N, int M){
  int i = blockIdx.x * blockDim.x + threadIdx.x;
  int j = blockIdx.y * blockDim.y + threadIdx.y;

  if(i < N && j < M){
    C[i * M + j] = A[i * M + j] + B[(N -  i - 1) * M + (M - j - 1)]; // C := C[i][j] = A[i][j] + B[N - i - 1][M - j - 1]
  }
}

// Kernel Function C_{2} matrix
__global__ void C2Matrix(double alpha, float *A, float *B, float *C, int N, int M){
  int i = blockIdx.x * blockDim.x + threadIdx.x;
  int j = blockIdx.y * blockDim.y + threadIdx.y;

  if(i < N && j < M){
    C[i * M + j] = alpha*A[i * M + j] + (1 - alpha)*B[i * M + j]; // C := C[i][j] = (a)A[i][j] + (1 - a)B[i][j]; with a in [0, 1]
  }
}
}'''

module = cp.RawModule(code = loaded_from_source)
C1Matrix = module.get_function('C1Matrix') # CUDA kernels can be retrieved by calling get_function()
C2Matrix = module.get_function('C2Matrix')

N = 2.0
M = 3.0
alpha = 0.1

A = 2.1*cp.arange(1.0, N*M+1, dtype=cp.float32).reshape(int(N), int(M)) # Matrix A_{nxm}
B = 4.1*cp.arange(1.0, N*M+1, dtype=cp.float32).reshape(int(N), int(M)) # Matrix B_{nxm}
C1 = cp.zeros((int(N), int(M)), dtype=cp.float32) # Matrix C1_{nxm} := C[i][j] = A[i][j] + B[N - i - 1][M - j - 1]
C2 = cp.zeros((int(N), int(M)), dtype=cp.float32) # Matrix C2_{nxm} := C[i][j] = (a)A[i][j] + (1 - a)B[i][j]; with a in [0, 1]

block_size = (16, 16)
grid_size = ((int(N) + block_size[0] - 1) // block_size[0], (int(M) + block_size[1] - 1) // block_size[1])

C1Matrix(grid_size, block_size, (A, B, C1, int(N), int(M))) # kernel function calling
C2Matrix(grid_size, block_size, (float(alpha), A, B, C2, int(N), int(M)))

print(A)
print(B)
print(C1) # Matrix C1_{nxm}
print(C2) # Matrix C2_{nxm}


[[ 2.1        4.2        6.2999997]
 [ 8.4       10.5       12.599999 ]]
[[ 4.1       8.2      12.299999]
 [16.4      20.5      24.599998]]
[[26.699999 24.7      22.699999]
 [20.699999 18.7      16.699999]]
[[ 3.8999999  7.7999997 11.699999 ]
 [15.599999  19.5       23.399998 ]]
