In [1]:
from typing import Callable

from inspect import getsource
import ast

from dusk.grammar import Grammar

from dawn4py import compile, CodeGenBackend
from dawn4py.serialization import make_sir, SIR
from dawn4py.serialization.SIR import GridType
from dawn4py._dawn4py import run_optimizer_sir


def dusk_to_sir(stencil: Callable) -> SIR:
    # TODO: this will give wrong line numbers, there should be a way to fix them
    name = stencil.__name__
    stencil = ast.parse(getsource(stencil))

    assert isinstance(stencil, ast.Module)
    assert len(stencil.body) == 1
    stencil = stencil.body[0]
    assert Grammar.is_stencil(stencil)

    return make_sir(
        name, GridType.Value("Unstructured"), [Grammar().stencil(stencil)]
    )

In [2]:
from dusk.script import *


@stencil
def laplacian_fvm(
    vec: Field[Edge, K],
    div_vec: Field[Cell, K],
    rot_vec: Field[Vertex, K],
    nabla2_vec: Field[Edge, K],
    primal_edge_length: Field[Edge],
    dual_edge_length: Field[Edge],
    tangent_orientation: Field[Edge],
    geofac_rot: Field[Vertex > Edge],
    geofac_div: Field[Cell > Edge],
) -> None:

    nabla2t1_vec: Field[Edge, K]
    nabla2t2_vec: Field[Edge, K]

    with levels_upward:

        # compute curl (on vertices)
        rot_vec = sum_over(Vertex > Edge, vec * geofac_rot)

        # compute divergence (on cells)
        div_vec = sum_over(Cell > Edge, vec * geofac_div)

        # first term of of nabla2 (gradient of curl)
        nabla2t1_vec = sum_over(Edge > Vertex, rot_vec, weights=[-1.0, 1])
        nabla2t1_vec = tangent_orientation * nabla2t1_vec / primal_edge_length

        # second term of of nabla2 (gradient of divergence)
        nabla2t2_vec = sum_over(Edge > Cell, div_vec, weights=[-1.0, 1])
        nabla2t2_vec = tangent_orientation * nabla2t2_vec / dual_edge_length

        # finalize nabla2 (difference between the two gradients)
        nabla2_vec = nabla2t2_vec - nabla2t1_vec


In [3]:
sir = dusk_to_sir(laplacian_fvm)

In [4]:
cpp_naive = compile(sir, backend=CodeGenBackend.CXXNaiveIco)
print(cpp_naive)
with open("laplacian_fvm_naive.cpp", "w+") as f:
    f.write(cpp_naive)

#define DAWN_GENERATED 1
#undef DAWN_BACKEND_T
#define DAWN_BACKEND_T CXXNAIVEICO
#include <driver-includes/unstructured_interface.hpp>
#include <driver-includes/unstructured_domain.hpp>


namespace dawn_generated{
namespace cxxnaiveico{
template<typename LibTag>
class laplacian_fvm {
private:

  struct stencil_72 {
    ::dawn::mesh_t<LibTag> const& m_mesh;
    int m_k_size;
    ::dawn::edge_field_t<LibTag, ::dawn::float_type>& m_vec;
    ::dawn::cell_field_t<LibTag, ::dawn::float_type>& m_div_vec;
    ::dawn::vertex_field_t<LibTag, ::dawn::float_type>& m_rot_vec;
    ::dawn::edge_field_t<LibTag, ::dawn::float_type>& m_nabla2_vec;
    ::dawn::edge_field_t<LibTag, ::dawn::float_type>& m_primal_edge_length;
    ::dawn::edge_field_t<LibTag, ::dawn::float_type>& m_dual_edge_length;
    ::dawn::edge_field_t<LibTag, ::dawn::float_type>& m_tangent_orientation;
    ::dawn::sparse_vertex_field_t<LibTag, ::dawn::float_type>& m_geofac_rot;
    ::dawn::sparse_cell_field_t<LibTag, ::dawn::float_typ

In [5]:
cpp_cuda = compile(sir, backend=CodeGenBackend.CUDAIco)
print(cpp_cuda)
with open("laplacian_fvm_cuda.cpp", "w+") as f:
    f.write(cpp_cuda)

#ifdef DAWN_ENABLE_BINDGEN
#include <cpp_bindgen/export.hpp>
#endif /* DAWN_ENABLE_BINDGEN */
#include "driver-includes/unstructured_interface.hpp"
#include "driver-includes/unstructured_domain.hpp"
#include "driver-includes/defs.hpp"
#include "driver-includes/cuda_utils.hpp"
#include "driver-includes/math.hpp"
#include "driver-includes/timer_cuda.hpp"
#define BLOCK_SIZE 16
#define LEVELS_PER_THREAD 1
using namespace gridtools::dawn;


namespace dawn_generated{
namespace cuda_ico{
template<int V_E_SIZE>__global__ void laplacian_fvm_stencil243_ms286_s307_kernel(int NumVertices, int NumEdges, int kSize, const int *veTable, const ::dawn::float_type * __restrict__ vec, ::dawn::float_type * __restrict__ rot_vec, const ::dawn::float_type * __restrict__ geofac_rot) {
  unsigned int pidx = blockIdx.x * blockDim.x + threadIdx.x;
  unsigned int kidx = blockIdx.y * blockDim.y + threadIdx.y;
  int klo = kidx * LEVELS_PER_THREAD + 0;
  int khi = (kidx + 1) * LEVELS_PER_THREAD + 0;
if (pidx >= NumVe