# Imports

In [1]:
import numpy as np
import cupy as cp
from numba import cuda, vectorize
from numpy import format_float_scientific as fs
import matplotlib.pyplot as plt
from matplotlib import cm
import time
import math

from numba_kernels import * 
from Timers import Timer, TimersManager
from lbmFlowAroundCylinder import inivel, obstacle_fun

## Timers definition

In [2]:
timers = TimersManager()
timers.add("main")
timers.add("equilibrium")
timers.add("collision")
timers.add("streaming")
timers.add("macroscopic")
timers.add("rightwall")
timers.add("leftwall")
timers.add("fin_inflow")
timers.add("bounceback")
timers.add("move_gpu->cpu")
timers.add("move_cpu->gpu")

## Flow definitions

In [3]:
maxIter = 2000    # Total number of time iterations.
Re = 150.0          # Reynolds number.
nx, ny = 4200, 4000   # Numer of lattice nodes.
ly = ny-1           # Height of the domain in lattice units.
cx, cy, r = nx//4, ny//2, ny//9 # Coordinates of the cylinder.
uLB     = 0.04                  # Velocity in lattice units.
nulb    = uLB*r/Re;             # Viscoscity in lattice units.
omega = 1 / (3*nulb+0.5);    # Relaxation parameter.
save_figures = False
profile = True

## Lattice constants

In [4]:
v = cuda.to_device(np.array([ [ 1,  1], [ 1,  0], [ 1, -1], [ 0,  1], [ 0,  0],
               [ 0, -1], [-1,  1], [-1,  0], [-1, -1] ], dtype=np.int32)) # 9 vecteurs : 9 directions de déplacement
v_np = np.empty(shape=v.shape, dtype=v.dtype)
v.copy_to_host(v_np)

t = cuda.to_device(np.array([ 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36], 
                            dtype=np.float32))
t_np = np.empty(shape=t.shape, dtype=t.dtype)
t.copy_to_host(t_np)

col1 = np.array([0, 1, 2])
col2 = np.array([3, 4, 5])
col3 = np.array([6, 7, 8])

# Code main functions

### Main loop

In [13]:
def oneLoop(obstacle, vel, v, t, fin, rho, u, feq, fout):
    rightwall_cuda[rig_blockspergrid, rig_threadsperblock](fin)
    
    macroscopic_cuda[mac_blockspergrid, mac_threadsperblock](fin, v, rho, u) 

    leftwall_cuda[lef_blockspergrid, lef_threadsperblock](fin, vel, u, rho)

    equilibrium_cuda[equ_blockspergrid, equ_threadsperblock](rho, u, v, t, feq) 
    
    fin_inflow_cuda[inf_blockspergrid, inf_threadsperblock](feq, fin)
    
    collision_cuda[col_blockspergrid, col_threadsperblock](fin, feq, fout)
    
    bounceback_cuda[bou_blockspergrid, bou_threadsperblock](fin, obstacle, fout)
    
    streaming_cuda[str_blockspergrid, str_threadsperblock](fout, v, fin)
        

In [14]:
def main():
    # create obstacle mask array from element-wise function
    obstacle_device = cuda.to_device(np.fromfunction(obstacle_fun, (nx,ny), dtype=np.float32))
    
    # initial velocity field vx,vy from element-wise function
    # vel is also used for inflow border condition
    vel = np.fromfunction(inivel, (2,nx,ny), dtype=np.float32) 
    vel_device = cuda.to_device(vel)
    
    # Initialization of the populations at equilibrium 
    # with the given velocity.
    fin_device = cuda.to_device(equilibrium(1, vel).astype(np.float32))
    
    rho_device = cuda.to_device(np.zeros(shape=(fin_device.shape[1], fin_device.shape[2]), dtype=np.float32))
    
    u_device = cuda.to_device(np.zeros((2, nx, ny), dtype=np.float32))
    
    feq_device = cuda.to_device(np.zeros_like(fin_device, dtype=np.float32))
    
    fout_device = cuda.to_device(np.zeros_like(fin_device, dtype=np.float32))
    

    for time in range(maxIter):
        oneLoop(obstacle_device, vel_device, v, t, fin_device,
                     rho_device, u_device, feq_device, fout_device)
        

def main_profile():
    # create obstacle mask array from element-wise function
    obstacle = np.fromfunction(obstacle_fun, (nx,ny), dtype=np.float32)
    obstacle_device = cuda.to_device(obstacle)
    
    # initial velocity field vx,vy from element-wise function
    # vel is also used for inflow border condition
    vel = np.fromfunction(inivel, (2,nx,ny), dtype=np.float32)
    vel_device = cuda.to_device(vel)
    
    # Initialization of the populations at equilibrium 
    # with the given velocity.
    fin = equilibrium(1, vel).astype(np.float32)
    fin_device = cuda.to_device(fin)
    
    rho = np.zeros(shape=(fin_device.shape[1], fin_device.shape[2]), dtype=np.float32)
    rho_device = cuda.to_device(rho)
    
    u = np.zeros((2, nx, ny), dtype=np.float32)
    u_device = cuda.to_device(u)
    
    feq = np.zeros_like(fin, dtype=np.float32)
    feq_device = cuda.to_device(feq)
    
    fout = np.zeros_like(fin, dtype=np.float32)
    fout_device = cuda.to_device(fout)
    
    ###### Main time loop ########
    for time in range(maxIter):
        # Right wall: outflow condition.
        # we only need here to specify distrib. function for velocities
        # that enter the domain (other that go out, are set by the streaming step)
        
        timers.get("rightwall").start()
        rightwall_cuda[rig_blockspergrid, rig_threadsperblock](fin_device)
        timers.get("rightwall").end()

        
        # Compute macroscopic variables, density and velocity.
        timers.get("macroscopic").start()
        macroscopic_cuda[mac_blockspergrid, mac_threadsperblock](fin_device, v, rho_device, u_device) # Timer in func
        #rho, u = macroscopic(fin)
        timers.get("macroscopic").end()
        
        # Left wall: inflow condition.
        timers.get("leftwall").start()
        ########################## NE FONCTIONNE PAS
        #u[:,0,:] = vel[:,0,:]                                             # A remettre pour exécution sur CPU
        #rho[0,:] = 1/(1-u[0,0,:]) * ( np.sum(fin[col2,0,:], axis=0) +     # A remettre pour exécution sur CPU
        #                                2*np.sum(fin[col3,0,:], axis=0) ) # A remettre pour exécution sur CPU
        leftwall_cuda[lef_blockspergrid, lef_threadsperblock](fin_device, vel_device, u_device, rho_device)
        timers.get("leftwall").end()
        

        
        # Compute equilibrium.
        timers.get("equilibrium").start()
        equilibrium_cuda[equ_blockspergrid, equ_threadsperblock](rho_device, u_device, v, t, feq_device) # Timer in func
        #feq = equilibrium(rho, u)
        timers.get("equilibrium").end()
    

    
    
        timers.get("fin_inflow").start()
        fin_inflow_cuda[inf_blockspergrid, inf_threadsperblock](feq_device, fin_device)
        #fin[[0,1,2],0,:] = feq[[0,1,2],0,:] + fin[[8,7,6],0,:] - feq[[8,7,6],0,:]
        timers.get("fin_inflow").end()


        # Collision step.
        timers.get("collision").start()
        #fout = fin - omega * (fin - feq) # Noyau de calcul 1
        collision_cuda[col_blockspergrid, col_threadsperblock](fin_device, feq_device, fout_device)
        timers.get("collision").end()

    
        
        # Bounce-back condition for obstacle.
        # in python language, we "slice" fout by obstacle
        timers.get("bounceback").start()
        bounceback_cuda[bou_blockspergrid, bou_threadsperblock](fin_device, obstacle_device, fout_device)
        #for i in range(9):
        #    fout[i, obstacle] = fin[8-i, obstacle]
        timers.get("bounceback").end()


        # Streaming step.
        timers.get("streaming").start()
        #for i in range(9):
        #    fin[i,:,:] = np.roll(np.roll(fout[i,:,:], v_np[i,0], axis=0),
        #                         v_np[i,1], axis=1 ) # Noyau de calcul 2
        streaming_cuda[str_blockspergrid, str_threadsperblock](fout_device, v, fin_device)
        timers.get("streaming").end()


        
        if ((time%100==0) and save_figures):
            plt.clf()
            u_device.copy_to_host(u)
            plt.imshow(np.sqrt(u[0]**2+u[1]**2).transpose(), cmap=cm.Reds)
            plt.show()
            #plt.savefig("figures/vel.{0:04d}.png".format(time//100))


In [15]:
if profile:
    timers.get("main").start()
    main_profile()
    timers.get("main").end()
else:
    timers.get("main").start()
    main()
    timers.get("main").end()

# Warnings ignorés pour l'instant, à régler à la fin

In [16]:
total = np.sum(timers.get("main").getMeasures())
print(f"Total time : {total:4.2f}s")
timers.printInfo()
timers.printBd(nx, ny, 4)
timers.printGflops(nx, ny)

Total time : 16.31s
--> Timer 'main         ' : N =    1 | Mean 1.631e+01 +- 0.e+00     | 100.0% of total time.
--> Timer 'equilibrium  ' : N = 2000 | Mean 2.061e-03 +- 1.845e-03  | 25.27% of total time.
--> Timer 'collision    ' : N = 2000 | Mean 1.599e-03 +- 1.317e-03  |  19.6% of total time.
--> Timer 'streaming    ' : N = 2000 | Mean 9.229e-04 +- 1.792e-03  | 11.32% of total time.
--> Timer 'macroscopic  ' : N = 2000 | Mean 2.016e-03 +- 1.826e-03  | 24.71% of total time.
--> Timer 'rightwall    ' : N = 2000 | Mean 1.845e-04 +- 5.689e-03  |  2.26% of total time.
--> Timer 'leftwall     ' : N = 2000 | Mean 1.279e-04 +- 1.671e-03  |  1.57% of total time.
--> Timer 'fin_inflow   ' : N = 2000 | Mean 2.639e-04 +- 3.291e-03  |  3.24% of total time.
--> Timer 'bounceback   ' : N = 2000 | Mean 1.346e-04 +- 1.226e-03  |  1.65% of total time.
--> Timer 'move_gpu->cpu' : N =    0
--> Timer 'move_cpu->gpu' : N =    0
--> Remaining 1.693e+00s not monitored represent 10.38% of total time
mem band

## Tests