In [1]:
from mpi4py import MPI
from manapy.ast import Variable
from manapy.base.base import Struct
from manapy.ddm import Domain
from manapy.partitions import MeshPartition
from manapy.solvers.advec.tools_utils import initialisation_gaussian_2d
from manapy.solvers.ls import PETScKrylovSolver
import numpy as np
import time
from numba import cuda

import matplotlib.pyplot as plt

from timeit import default_timer as timer

###############
# test_time
###############

def test_time(iter, fun):
  #fun()
  start_time = timer()
  for _ in range(iter):
    fun()
  end_time = timer()
  elapsed_time = (end_time - start_time) / iter
  print(f"{elapsed_time * 1000:.5f} ms")
  #print(f"{elapsed_time * 1000000:.5f} micros")

###############
# init
###############
def init(dim, mesh_path):
  running_conf = Struct(backend="numba", signature=True, cache=True, precision="double")
  MeshPartition(mesh_path, dim=dim, conf=running_conf, periodic=[0,0,0])

  domain = Domain(dim=dim, conf=running_conf)
  faces = domain.faces
  cells = domain.cells
  halos = domain.halos
  nodes = domain.nodes

  nbnodes = domain.nbnodes
  nbfaces = domain.nbfaces
  nbcells = domain.nbcells
  backend = domain.backend
  signature = domain.signature

  test_para = 'test_2'

  if test_para == "test_1" :
      fi    = 0.81
      U_n  = 3.e-3
      Pin = 1e5
      perm0 = 6.83e-9
      mu0 = 0.3
      tfinal = 122

      test = "pression"
      filename = "Geom_exp3.msh"


  if test_para == "test_2" :
      C0 = 0.
      alpha0 = 1.
      sigma_u = 0.85
      a = 1.
      A = 0.68
      Pin = 3e5
      mu0  = 0.109
      fi0 =  0.45
      U_n  = 1e-1
      tfinal = 1000

      test = "pression"
      filename = "TMesh.msh"

  boundariesI = {"in" : "dirichlet",
                  "out" : "neumann",
                  "upper":"neumann",
                  "bottom":"neumann"
              }
  valuesI = {"in" : 1}

  I = Variable(domain=domain, BC=boundariesI, values=valuesI,  terms = ["Flux"])
  I.update_ghost_value()

  # Concentration
  boundariesC = {"in" : "dirichlet",
                  "out" : "neumann",
                  "upper":"neumann",
                  "bottom":"neumann"
              }
  valuesC = {"in" : C0}
  C = Variable(domain=domain, BC=boundariesC, values=valuesC,  terms = ["Flux"])
  C.update_ghost_value()

  fiC = Variable(domain=domain, terms = ["Flux"])

  perm_x_0  = Variable(domain=domain)
  perm_y_0  = Variable(domain=domain)

  perm_x  = Variable(domain=domain)
  perm_y  = Variable(domain=domain)

  visc  = Variable(domain=domain)
  fi  = Variable(domain=domain)
  sigma = Variable(domain=domain)
  alpha = Variable(domain=domain)

  ## initialization of viscosity, porosity and filtration coieficient
  visc.cell[:]  = mu0
  fi.cell[:]  = fi0
  alpha.cell[:] = alpha0
  visc.update_ghost_value()
  fi.update_ghost_value()
  alpha.update_ghost_value()

  ## initialization of the permeability
  for i in range(nbcells):
      if cells.center[i][1] >= 0.0014:
          perm_x_0.cell[i] =  4.e-11
          perm_y_0.cell[i] =  2.e-11

      else:
          perm_x_0.cell[i] =  2.e-11
          perm_y_0.cell[i] =  4.e-11

  # Perm changes over time
  perm_x.cell[:] = perm_x_0.cell[:]
  perm_y.cell[:] = perm_y_0.cell[:]

  perm_x.update_ghost_value()
  perm_y.update_ghost_value()

  ## injection mode
  if test == "pression":
      boundariesP = {"in" : "dirichlet",
                    "out" : "dirichlet",
                    "upper":"neumann",
                    "bottom":"neumann"
                  }
      valuesP = {"in" : Pin, "out": 0. }
      boundariesU = {"in" : "neumann",
                    "out" : "neumann",
                    "upper":"noslip",
                    "bottom":"noslip"}
      
      u  = Variable(domain=domain, BC=boundariesU)
      v  = Variable(domain=domain, BC=boundariesU)


  elif test == "debit":
      boundariesP = {"in" : "neumannNH",
                    "out" : "dirichlet",
                    "upper":"neumann",
                    "bottom":"neumann"
                  }
      cst = (mu0/perm0)*U_n
      valuesP = {"in" : cst, "out": 0. }

      boundariesU = {"in" : "dirichlet",
                    "out" : "neumann",
                    "upper":"neumann",
                    "bottom":"neumann"
                  }
      valuesU = {"in": U_n}
      u  = Variable(domain=domain)#, BC=boundariesU, values=valuesU)

  v  = Variable(domain=domain)
  w  = Variable(domain=domain)

  P = Variable(domain=domain, BC=boundariesP, values=valuesP)
  Pexact = Variable(domain=domain)
  Iexact = Variable(domain=domain)


  for i in domain.infaces:
      K = faces.cellid[i][0]
      I.cell[K] = 1.

  x0    = 0.
  cst = 0.
  time = 0
  miter = 0
  niter = 1
  saving_at_node = 1
  order = 2
  cfl = 0.8
  alpha_para = 2e-6

  dissip_I = np.zeros(nbcells)
  src_C = np.zeros(nbcells)
  src_I = np.zeros(nbcells)
  div = np.zeros(nbcells)

  conf = Struct(reuse_mtx=False, with_mtx=True, scheme='diamond', verbose=False, 
                precond='gamg', sub_precond="amg",
                eps_a=1e-10, eps_r=1e-10, method="gmres")
                
  L = PETScKrylovSolver(domain=domain, var=P, conf=conf)


  Errors = []
  Times = []
  x_front = []
  c = 1
  d_t = 1e-4

  return (domain, L, P, I, alpha_para, nbcells, visc, faces)

In [2]:
dim = 2
mesh_file = "/home/aben-ham/Desktop/work/stage/my_manapy/manapy/mesh/2D/carre.msh"
#mesh_file = "/home/aben-ham/Desktop/work/stage/my_manapy/manapy/mesh/2D/carre_hybrid.msh"
#mesh_file = "/home/aben-ham/Desktop/work/stage/my_manapy/gpu_accelerator/functions/square_larger.msh"
#mesh_file = "/home/aben-ham/Desktop/work/stage/my_manapy/gpu_accelerator/functions/square.msh"
#mesh_file = "/home/ayoub.hamou/mesh/square.msh"
#mesh_file = "/home/ayoub.hamou/mesh/square_larger.msh"
domain, L, P, I, alpha_para, nbcells, visc, faces = init(dim=dim, mesh_path=mesh_file)

Reading gmsh file ...
Saving partition files ...
Number of Cells: 105826
Number of Vertices: 53314
Local domain contruction ...
SetUp the Linear system ...


In [56]:
from numba import cuda

def create_var(v):
  return (v, cuda.to_device(v))

#! need input
#! check if there is a primitive type value

row = L._row
col = L._col
data = L._data
P_ghost = np.array([])
Icell = I.cell
perm  = 1.
visc = 2.
alpha  = 2.
cellfid  = domain.faces.cellid
volume = domain.cells.volume
faceidc = domain.cells.faceid
mesuref  = 0. #?
matrixinnerfaces  = L.matrixinnerfaces
dirichletfaces = L.var.dirichletfaces
dist = domain.faces.dist_ortho

host_row, d_row = create_var(row)
host_col, d_col = create_var(col)
host_data, d_data = create_var(data)
host_P_ghost, d_P_ghost = create_var(P_ghost)
host_Icell, d_Icell = create_var(Icell)
host_perm, d_perm = create_var(perm)
host_visc, d_visc = create_var(visc)
host_alpha, d_alpha = create_var(alpha)
host_cellfid, d_cellfid = create_var(cellfid)
host_volume, d_volume = create_var(volume)
host_faceidc, d_faceidc = create_var(faceidc)
host_mesuref, d_mesuref = create_var(mesuref)
host_matrixinnerfaces, d_matrixinnerfaces = create_var(matrixinnerfaces)
host_dirichletfaces, d_dirichletfaces = create_var(dirichletfaces)
host_dist, d_dist = create_var(dist)

In [58]:
#The original function from functions2d.py

from numpy import  int32, float32, uint32
import numpy as np

def Mat_Assembly(row, col, data,
                 P_ghost, Icell, 
                 perm, visc, alpha, 
                 cellfid, volume, faceidc,
                 mesuref, matrixinnerfaces, 
                 dirichletfaces, dist):
    
    row[:] = 0
    col[:] = 0
    data[:] = 0.

    cmpt = 0
    for face in matrixinnerfaces:
        nbfL = faceidc[cellfid[0][0]][-1]
        nbfR = faceidc[cellfid[0][1]][-1]
        K = cellfid[face][0] 
        L = cellfid[face][1]
        mesure = mesuref[face]
        volumeK = volume[K]
        volumeL = volume[L]
        
        row[cmpt] = K
        col[cmpt] = K
        data[cmpt] = - Icell[K]*(perm/visc)*(mesure/dist[face]) + (1/nbfL)*volumeK*alpha*(1 - Icell[K])
        cmpt = cmpt + 1
        
        row[cmpt] = L
        col[cmpt] = L
        data[cmpt] = - Icell[L]*(perm/visc)*(mesure/dist[face]) + (1/nbfR)*volumeL*alpha*(1 - Icell[L])
        cmpt = cmpt + 1
        
        row[cmpt] = K
        col[cmpt] = L
        data[cmpt] = Icell[K]*(perm/visc)*(mesure/dist[face]) 
        cmpt = cmpt + 1
        
        row[cmpt] = L
        col[cmpt] = K
        data[cmpt] = Icell[L]*(perm/visc)*(mesure/dist[face]) 
        cmpt = cmpt + 1
        
    for face in dirichletfaces: 
        nbfL = faceidc[cellfid[0][0]][-1]
        K = cellfid[face][0] 
        mesure = mesuref[face]
        volumeK = volume[K]
        
        row[cmpt] = K
        col[cmpt] = K
        data[cmpt] = - Icell[K]*(perm/visc)*(mesure/dist[face]) + (1/nbfL)*volumeK*alpha*(1 - Icell[K])
        cmpt = cmpt + 1


In [59]:
#? using numba jit -> backend.py
#! Using parallel=True yields poor results.
import numba

numba_Mat_Assembly = numba.jit(Mat_Assembly, nopython=True, fastmath=True, parallel=False, cache=True)
def cpu_Mat_Assembly():
  return numba_Mat_Assembly(
    host_row,
    host_col,
    host_data,
    host_P_ghost,
    host_Icell,
    host_perm,
    host_visc,
    host_alpha,
    host_cellfid,
    host_volume,
    host_faceidc,
    host_mesuref,
    host_matrixinnerfaces,
    host_dirichletfaces,
    host_dist,
  )


In [60]:
#time taken by cpu_Mat_Assembly


cpu_Mat_Assembly()
test_time(70, cpu_Mat_Assembly)

134.03772 ms


In [66]:
@cuda.jit()
def kernel_Mat_Assembly(row, col, data,# b, 
                 P_ghost, Icell, 
                 perm, visc, alpha, 
                 cellfid, volume, faceidc,
                 mesuref, matrixinnerfaces, 
                 dirichletfaces, dist):

    idx = cuda.grid(1)
    
    #? row[:] = 0
    #? col[:] = 0
    #? data[:] = 0.

    cmpt = min(idx, matrixinnerfaces.shape[0]) * 4 + min(idx, dirichletfaces.shape[0])

    #cmpt = 0

    if idx < matrixinnerfaces.shape[0]:
        face = matrixinnerfaces[idx]

        nbfL = faceidc[cellfid[0][0]][-1]
        nbfR = faceidc[cellfid[0][1]][-1]
        K = cellfid[face][0] 
        L = cellfid[face][1]
        mesure = mesuref[face]
        volumeK = volume[K]
        volumeL = volume[L]
        
        row[cmpt] = K
        col[cmpt] = K
        data[cmpt] = - Icell[K]*(perm/visc)*(mesure/dist[face]) + (1/nbfL)*volumeK*alpha*(1 - Icell[K])
        cmpt = cmpt + 1
        
        row[cmpt] = L
        col[cmpt] = L
        data[cmpt] = - Icell[L]*(perm/visc)*(mesure/dist[face]) + (1/nbfR)*volumeL*alpha*(1 - Icell[L])
        cmpt = cmpt + 1
        
        row[cmpt] = K
        col[cmpt] = L
        data[cmpt] = Icell[K]*(perm/visc)*(mesure/dist[face]) 
        cmpt = cmpt + 1
        
        row[cmpt] = L
        col[cmpt] = K
        data[cmpt] = Icell[L]*(perm/visc)*(mesure/dist[face]) 
        cmpt = cmpt + 1
        
    if idx < dirichletfaces.shape[0]:
        face = dirichletfaces[idx]

        nbfL = faceidc[cellfid[0][0]][-1]
        K = cellfid[face][0] 
        mesure = mesuref[face]
        volumeK = volume[K]
        
        row[cmpt] = K
        col[cmpt] = K
        data[cmpt] = - Icell[K]*(perm/visc)*(mesure/dist[face]) + (1/nbfL)*volumeK*alpha*(1 - Icell[K])
        cmpt = cmpt + 1
        




In [73]:
import numba

d_cmpt = cuda.to_device(np.array([0]).astype(int))

nb_element = max(
    d_matrixinnerfaces.shape[0],
    d_dirichletfaces.shape[0],
  )

nb_threads = 32
nb_blocks = nb_element // nb_threads + 1
print(nb_blocks)

@cuda.jit
def kernel_zero(arr_out):
  i = cuda.grid(1)
  if i < arr_out.shape[0]:
    arr_out[i] = 0

def cuda_Mat_Assembly():
  kernel_zero[1, 1](d_cmpt)
  kernel_zero[d_row.shape[0] // nb_threads + 1, nb_threads](d_row)
  kernel_zero[d_col.shape[0] // nb_threads + 1, nb_threads](d_col)
  kernel_zero[d_data.shape[0] // nb_threads + 1, nb_threads](d_data)
  cuda.synchronize()
  kernel_Mat_Assembly[nb_blocks, nb_threads](
    d_row,
    d_col,
    d_data,
    d_P_ghost,
    d_Icell,
    d_perm,
    d_visc,
    d_alpha,
    d_cellfid,
    d_volume,
    d_faceidc,
    d_mesuref,
    d_matrixinnerfaces,
    d_dirichletfaces,
    d_dist,
    #d_cmpt
  )
  cuda.synchronize()

cuda_Mat_Assembly()
test_time(70, cuda_Mat_Assembly)

4949




23.63369 ms
