In [19]:
from mpi4py import MPI
from manapy.ast import Variable
from manapy.base.base import Struct
from manapy.ddm import Domain
from manapy.partitions import MeshPartition
from manapy.solvers.advec.tools_utils import initialisation_gaussian_2d
from manapy.solvers.ls import PETScKrylovSolver
import numpy as np
import time
from numba import cuda

import matplotlib.pyplot as plt

from timeit import default_timer as timer
from manapy.cuda.utils import (VarClass, GPU_Backend)
###############
# test_time
###############

def test_time(iter, fun):
  #fun()
  start_time = timer()
  for _ in range(iter):
    fun()
  end_time = timer()
  elapsed_time = (end_time - start_time) / iter
  print(f"{elapsed_time * 1000:.5f} ms")
  #print(f"{elapsed_time * 1000000:.5f} micros")

###############
# init
###############
def init(dim, mesh_path):
  GPU_Backend.float_precision = 'float64'
  running_conf = Struct(backend="numba", signature=True, cache=True, float_precision="double")
  MeshPartition(mesh_path, dim=dim, conf=running_conf, periodic=[0,0,0])

  domain = Domain(dim=dim, conf=running_conf)
  faces = domain.faces
  cells = domain.cells
  halos = domain.halos
  nodes = domain.nodes

  nbnodes = domain.nbnodes
  nbfaces = domain.nbfaces
  nbcells = domain.nbcells
  backend = domain.backend
  signature = domain.signature

  test_para = 'test_2'

  if test_para == "test_1" :
      fi    = 0.81
      U_n  = 3.e-3
      Pin = 1e5
      perm0 = 6.83e-9
      mu0 = 0.3
      tfinal = 122

      test = "pression"
      filename = "Geom_exp3.msh"


  if test_para == "test_2" :
      C0 = 0.
      alpha0 = 1.
      sigma_u = 0.85
      a = 1.
      A = 0.68
      Pin = 3e5
      mu0  = 0.109
      fi0 =  0.45
      U_n  = 1e-1
      tfinal = 1000

      test = "pression"
      filename = "TMesh.msh"

  boundariesI = {"in" : "dirichlet",
                  "out" : "neumann",
                  "upper":"neumann",
                  "bottom":"neumann"
              }
  valuesI = {"in" : 1}

  I = Variable(domain=domain, BC=boundariesI, values=valuesI,  terms = ["Flux"])
  I.update_ghost_value()

  # Concentration
  boundariesC = {"in" : "dirichlet",
                  "out" : "neumann",
                  "upper":"neumann",
                  "bottom":"neumann"
              }
  valuesC = {"in" : C0}
  C = Variable(domain=domain, BC=boundariesC, values=valuesC,  terms = ["Flux"])
  C.update_ghost_value()

  fiC = Variable(domain=domain, terms = ["Flux"])

  perm_x_0  = Variable(domain=domain)
  perm_y_0  = Variable(domain=domain)

  perm_x  = Variable(domain=domain)
  perm_y  = Variable(domain=domain)

  visc  = Variable(domain=domain)
  fi  = Variable(domain=domain)
  sigma = Variable(domain=domain)
  alpha = Variable(domain=domain)

  ## initialization of viscosity, porosity and filtration coieficient
  visc.cell[:]  = mu0
  fi.cell[:]  = fi0
  alpha.cell[:] = alpha0
  visc.update_ghost_value()
  fi.update_ghost_value()
  alpha.update_ghost_value()

  ## initialization of the permeability
  for i in range(nbcells):
      if cells.center[i][1] >= 0.0014:
          perm_x_0.cell[i] =  4.e-11
          perm_y_0.cell[i] =  2.e-11

      else:
          perm_x_0.cell[i] =  2.e-11
          perm_y_0.cell[i] =  4.e-11

  # Perm changes over time
  perm_x.cell[:] = perm_x_0.cell[:]
  perm_y.cell[:] = perm_y_0.cell[:]

  perm_x.update_ghost_value()
  perm_y.update_ghost_value()

  ## injection mode
  if test == "pression":
      boundariesP = {"in" : "dirichlet",
                    "out" : "dirichlet",
                    "upper":"neumann",
                    "bottom":"neumann"
                  }
      valuesP = {"in" : Pin, "out": 0. }
      boundariesU = {"in" : "neumann",
                    "out" : "neumann",
                    "upper":"noslip",
                    "bottom":"noslip"}
      
      u  = Variable(domain=domain, BC=boundariesU)
      v  = Variable(domain=domain, BC=boundariesU)


  elif test == "debit":
      boundariesP = {"in" : "neumannNH",
                    "out" : "dirichlet",
                    "upper":"neumann",
                    "bottom":"neumann"
                  }
      cst = (mu0/perm0)*U_n
      valuesP = {"in" : cst, "out": 0. }

      boundariesU = {"in" : "dirichlet",
                    "out" : "neumann",
                    "upper":"neumann",
                    "bottom":"neumann"
                  }
      valuesU = {"in": U_n}
      u  = Variable(domain=domain)#, BC=boundariesU, values=valuesU)

  v  = Variable(domain=domain)
  w  = Variable(domain=domain)

  P = Variable(domain=domain, BC=boundariesP, values=valuesP)
  Pexact = Variable(domain=domain)
  Iexact = Variable(domain=domain)


  for i in domain.infaces:
      K = faces.cellid[i][0]
      I.cell[K] = 1.

  x0    = 0.
  cst = 0.
  time = 0
  miter = 0
  niter = 1
  saving_at_node = 1
  order = 2
  cfl = 0.8
  alpha_para = 2e-6

  dissip_I = np.zeros(nbcells)
  src_C = np.zeros(nbcells)
  src_I = np.zeros(nbcells)
  div = np.zeros(nbcells)

  conf = Struct(reuse_mtx=False, with_mtx=True, scheme='diamond', verbose=False, 
                precond='gamg', sub_precond="amg",
                eps_a=1e-10, eps_r=1e-10, method="gmres")
                
  L = PETScKrylovSolver(domain=domain, var=P, conf=conf)


  Errors = []
  Times = []
  x_front = []
  c = 1
  d_t = 1e-4

  return (domain, L, P, I, alpha_para, nbcells, visc, faces)

In [20]:
dim = 2
mesh_file = "/home/aben-ham/Desktop/work/stage/my_manapy/manapy/mesh/2D/carre.msh"
#mesh_file = "/home/aben-ham/Desktop/work/stage/my_manapy/manapy/mesh/2D/carre_hybrid.msh"
#mesh_file = "/home/aben-ham/Desktop/work/stage/my_manapy/gpu_accelerator/functions/square_larger.msh"
#mesh_file = "/home/aben-ham/Desktop/work/stage/my_manapy/gpu_accelerator/functions/square.msh"
#mesh_file = "/home/ayoub.hamou/mesh/square.msh"
#mesh_file = "/home/ayoub.hamou/mesh/square_larger.msh"
domain, L, P, I, alpha_para, nbcells, visc, faces = init(dim=dim, mesh_path=mesh_file)

Reading gmsh file ...
Saving partition files ...
Number of Cells: 105826
Number of Vertices: 53314
Local domain contruction ...
SetUp the Linear system ...


In [21]:
faces.param1.dtype

dtype('float64')

In [22]:
# args list
#? The order is imported of the argument list

VarClass.convert_to_var_class([
    domain.nodes,
    domain.faces,
    domain.cells,
    domain.halos,
    domain,
    L,
    L.var,
    I,
    visc,
    P
])

args = [
  domain.faces.cellid,
  domain.faces.nodeid,
  domain.cells.faceid,
  domain.nodes.vertex,
  domain.faces.halofid,
  domain.halos.halosext,
  domain.nodes.oldname,
  domain.cells.volume,
  domain.nodes.cellid,
  domain.cells.center,
  domain.halos.centvol,
  domain.nodes.halonid,
  domain.nodes.periodicid,
  domain.nodes.ghostcenter,
  domain.nodes.haloghostcenter,
  domain.faces.airDiamond,
  domain.nodes.lambda_x,
  domain.nodes.lambda_y,
  domain.nodes.number,
  domain.nodes.R_x,
  domain.nodes.R_y,
  faces.param1,
  faces.param2,
  faces.param3,
  faces.param4,
  domain.cells.shift,
  L.localsize,
  domain.cells.loctoglob,
  P.BCdirichlet,
  L._data,
  L._row,
  L._col,
  L.matrixinnerfaces,
  domain.halofaces,
  P.dirichletfaces,
  I.cell,
  alpha_para,
  VarClass(np.ones(nbcells, dtype=(faces.param1.dtype))),
  visc.cell,
  P.BCneumannNH,
  faces.dist_ortho,
]



---------------------------------
---------------------------------
can't get attr _nbnodes => _nbnodes
can't get attr nbnodes => _nbnodes
can't set attr for R_x => can't set attribute
can't set attr for R_y => can't set attribute
can't set attr for R_z => can't set attribute
set att: _R_x
set att: _R_y
set att: _R_z
set att: _cellid
set att: _ghostcenter
set att: _ghostfaceinfo
set att: _ghostid
set att: _haloghostcenter
set att: _haloghostfaceinfo
set att: _haloghostid
set att: _halonid
set att: _lambda_x
set att: _lambda_y
set att: _lambda_z
set att: _loctoglob
set att: _name
set att: _number
set att: _oldname
set att: _periodicid
set att: _vertex
can't set attr for cellid => can't set attribute
can't set attr for ghostcenter => can't set attribute
can't set attr for ghostfaceinfo => can't set attribute
can't set attr for ghostid => can't set attribute
can't set attr for haloghostcenter => can't set attribute
can't set attr for haloghostfaceinfo => can't set attribute
can't set attr

In [23]:
from manapy.ast.functions2d import get_triplet_2d_with_contrib as cpu_function
from manapy.cuda.manapy.ast.cuda_functions2d import get_kernel_get_triplet_2d_with_contrib as gpu_function

numba_fun = domain.backend.compile(cpu_function, echo=True, signature=True)
cuda_fun = gpu_function()

compile get_triplet_2d_with_contrib to cpu => signature=(int32[:,:], int32[:,:], int32[:,:], float64[:,:], int32[:], int32[:,:], int32[:], float64[:], int32[:,:], float64[:,:], float64[:,:], int32[:,:], int32[:,:], float64[:,:,:], float64[:,:,:], float64[:], float64[:], float64[:], int32[:], float64[:], float64[:], float64[:], float64[:], float64[:], float64[:], float64[:,:], int32, int32[:], int32[:], float64[:], int32[:], int32[:], int32[:], int32[:], int32[:], float64[:], float64, float64[:], float64[:], int32[:], float64[:])
compile device_search_element to cuda => signature=int32(int32[:], int32)
compile kernel_get_triplet_2d_with_contrib to cuda => signature=void(int32[:,:], int32[:,:], int32[:,:], float64[:,:], int32[:], int32[:,:], int32[:], float64[:], int32[:,:], float64[:,:], float64[:,:], int32[:,:], int32[:,:], float64[:,:,:], float64[:,:,:], float64[:], float64[:], float64[:], int32[:], float64[:], float64[:], float64[:], float64[:], float64[:], float64[:], float64[:,:], 

In [24]:
numba_fun(*args)
%timeit numba_fun(*args)

141 ms ± 5.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
cuda_fun(*args)
%timeit cuda_fun(*args)

CUDADispatcher(<function get_kernel_get_triplet_2d_with_contrib.<locals>.kernel_get_triplet_2d_with_contrib at 0x79c794a3bc10>) is called
VarClass<int32, (159139, 2)>
VarClass<int32, (159139, 3)>
VarClass<int32, (105826, 4)>
VarClass<float64, (53314, 4)>
VarClass<int32, (159139,)>
VarClass<int32, (2, 2)>
VarClass<int32, (53314,)>
VarClass<float64, (105826,)>
VarClass<int32, (53314, 10)>
VarClass<float64, (105826, 3)>
VarClass<float64, (2, 2)>
VarClass<int32, (53314, 2)>
VarClass<int32, (53314, 2)>
VarClass<float64, (53314, 2, 5)>
VarClass<float64, (53314, 1, 5)>
VarClass<float64, (159139,)>
VarClass<float64, (53314,)>
VarClass<float64, (53314,)>
VarClass<int32, (53314,)>
VarClass<float64, (53314,)>
VarClass<float64, (53314,)>
VarClass<float64, (159139,)>
VarClass<float64, (159139,)>
VarClass<float64, (159139,)>
VarClass<float64, (159139,)>
VarClass<float64, (105826, 3)>
<class 'numpy.int32'>
VarClass<int32, (105826,)>
VarClass<int32, (2,)>
VarClass<float64, (4488612,)>
VarClass<int32, 



19.3 ms ± 63.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [26]:
# verify

#? float32 does not give a precise result as float64
gpu_res = VarClass.to_device(args[29]).copy_to_host() + VarClass.to_device(args[30]).copy_to_host() + VarClass.to_device(args[31]).copy_to_host()
cpu_res = args[29] + args[30] + args[31]
print(np.sum(cpu_res), " ==? ", np.sum(gpu_res))
gpu_res.sort()
cpu_res.sort()

np.testing.assert_almost_equal(gpu_res, cpu_res, decimal=1)



467502347697.0212  ==?  467502347697.02124
