In [1]:
from mpi4py import MPI
from manapy.ast import Variable
from manapy.base.base import Struct
from manapy.ddm import Domain
from manapy.partitions import MeshPartition
from manapy.solvers.advec.tools_utils import initialisation_gaussian_2d
import numpy as np
import time
from numba import cuda
from manapy.solvers.advec import AdvectionSolver

import matplotlib.pyplot as plt

from timeit import default_timer as timer
from manapy.cuda.utils import (VarClass, GPU_Backend)

###############
# test_time
###############

def test_time(iter, fun):
  #fun()
  start_time = timer()
  for _ in range(iter):
    fun()
  end_time = timer()
  elapsed_time = (end_time - start_time) / iter
  print(f"{elapsed_time * 1000:.5f} ms")
  #print(f"{elapsed_time * 1000000:.5f} micros")

###############
# init
###############
def init(dim, mesh_path):
  GPU_Backend.float_precision = 'float64'
  running_conf = Struct(backend="numba", signature=True, cache=True, float_precision="double")
  MeshPartition(mesh_path, dim=dim, conf=running_conf, periodic=[0,0,0])

  domain = Domain(dim=dim, conf=running_conf)
  ne = Variable(domain=domain)
  u  = Variable(domain=domain)
  v  = Variable(domain=domain)
  w  = Variable(domain=domain)
  
  P = Variable(domain=domain)
  Pinit = 2.0
  cells = domain.cells
  initialisation_gaussian_2d(ne.cell, u.cell, v.cell, P.cell, cells.center, Pinit)

  u.face[:] = 2.
  v.face[:] = 2.
  w.face[:] = 1.
  
  u.interpolate_facetocell()
  v.interpolate_facetocell()
  w.interpolate_facetocell()

  
  return (domain, ne, u, v, w, P, running_conf)

In [6]:
dim = 2
mesh_file = "/home/aben-ham/Desktop/work/stage/my_manapy/manapy/mesh/2D/carre.msh"
#mesh_file = "/home/ayoub.hamou/mesh/square.msh"
#mesh_file = "/home/aben-ham/Desktop/work/stage/my_manapy/gpu_accelerator/functions/square.msh"
domain, ne, u, v, w, P, running_conf = init(dim=dim, mesh_path=mesh_file)

Reading gmsh file ...
Saving partition files ...
Number of Cells: 105826
Number of Vertices: 53314
Local domain contruction ...


In [33]:
# args list
#? The order is imported of the argument list

w_c = np.array(np.arange(0, 100), dtype=GPU_Backend.float_precision)
w_halosend = np.array([0, 0], dtype=GPU_Backend.float_precision)
indsend = np.array([8], dtype='int32')

args = [
  VarClass(w_c),
  VarClass(w_halosend),
  VarClass(indsend),
]

for arg in args:
  print(type(arg), arg.shape, arg.dtype)

<class 'manapy.cuda.utils.VarClass.VarClass'> (100,) float64
<class 'manapy.cuda.utils.VarClass.VarClass'> (2,) float64
<class 'manapy.cuda.utils.VarClass.VarClass'> (1,) int32


In [19]:
from manapy.comms.communication import define_halosend as cpu_function
from manapy.cuda.manapy.comms.cuda_communication import get_kernel_define_halosend as gpu_function

numba_fun = domain.backend.compile(cpu_function, echo=True, signature=True)
cuda_fun = gpu_function()

compile define_halosend to cpu => signature=(float64[:], float64[:], int32[:])
compile kernel_define_halosend to cuda => signature=void(float64[:], float64[:], int32[:])


In [34]:
numba_fun(*args)
#%timeit numba_fun(*args)

In [35]:
cuda_fun(*args)
#%timeit cuda_fun(*args)

In [36]:
# verify

def plot(a):
  b = VarClass.to_device(a).copy_to_host()
  plt.plot(a, label="cpu")
  plt.plot(b, label="gpu")
  plt.legend()
  plt.show()

def verify(a, decimal):
  b = VarClass.to_device(a)
  np.testing.assert_almost_equal(a, b, decimal=decimal)


for i, arg in enumerate(args):
  print(f"{i + 1} => test arg {arg}")
  verify(arg, decimal=2)

1 => test arg [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35.
 36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53.
 54. 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71.
 72. 73. 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89.
 90. 91. 92. 93. 94. 95. 96. 97. 98. 99.]
2 => test arg [8. 8.]


AssertionError: 
Arrays are not almost equal to 2 decimals

Mismatched elements: 1 / 2 (50%)
Max absolute difference: 8.
Max relative difference: 0.
 x: VarClass([8., 8.])
 y: array([8., 0.])