diff --git a/.github/workflows/aipu-build-and-test.yml b/.github/workflows/aipu-build-and-test.yml index 9fd20ee20..1d0314fbe 100644 --- a/.github/workflows/aipu-build-and-test.yml +++ b/.github/workflows/aipu-build-and-test.yml @@ -52,7 +52,7 @@ jobs: source ~/env_setup.sh export FLAGTREE_BACKEND=aipu cd python - MAX_JOBS=64 python3.10 -m pip install . --no-build-isolation -v + MAX_JOBS=16 python3.10 -m pip install . --no-build-isolation -v - name: FlagTree Test on AIPU shell: bash diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 109c292fe..72181b98f 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -1 +1 @@ -add_subdirectory(triton) +add_subdirectory(triton) \ No newline at end of file diff --git a/include/flagtree/Common/UnifiedHardwareBase.h b/include/flagtree/Common/UnifiedHardwareBase.h new file mode 100644 index 000000000..03e8aba38 --- /dev/null +++ b/include/flagtree/Common/UnifiedHardwareBase.h @@ -0,0 +1,27 @@ +#ifndef UNIFIED_HARDWARE_BASE_H +#define UNIFIED_HARDWARE_BASE_H + +#include + +namespace mlir { +namespace flagtree { +//this is the unified hardware abstraction for hardware +//to determined if these abstraction is specified, using std::optional is needed +//using in passes: if(uh_flagtree->xxx()){...} + +class UnifiedHardware{ + +public: + virtual ~UnifiedHardware() = default; + + //DMA + virtual std::optional getAllocSpaceForDMATag() const { + return std::nullopt; + } + +}; + +} // namespace flagtree +} // namespace mlir + +#endif // UNIFIED_HARDWARE_BASE_H diff --git a/include/triton/Dialect/Triton/IR/TritonAttrDefs.td b/include/triton/Dialect/Triton/IR/TritonAttrDefs.td index 571d2b55b..898ca1559 100644 --- a/include/triton/Dialect/Triton/IR/TritonAttrDefs.td +++ b/include/triton/Dialect/Triton/IR/TritonAttrDefs.td @@ -4,6 +4,7 @@ include "mlir/IR/EnumAttr.td" include "mlir/IR/AttrTypeBase.td" + // Attributes for LoadOp and StoreOp def TT_CacheModifierAttr : I32EnumAttr< "CacheModifier", "", diff --git a/python/src/ir.cc b/python/src/ir.cc index ee35ce834..c1f092ed7 100644 --- a/python/src/ir.cc +++ b/python/src/ir.cc @@ -674,6 +674,13 @@ void init_triton_ir(py::module &&m) { values)); }); + m.def("make_attr_i64", [](const std::vector &values, MLIRContext &context) { + return mlir::cast(DenseIntElementsAttr::get( + RankedTensorType::get({static_cast(values.size())}, + IntegerType::get(&context, 64)), + values)); + }); + m.def( "parse_mlir_module", [](const std::string &inputFilename, MLIRContext &context) { diff --git a/third_party/aipu/backend/compiler.py b/third_party/aipu/backend/compiler.py index 5a2c16b84..5596d21a4 100644 --- a/third_party/aipu/backend/compiler.py +++ b/third_party/aipu/backend/compiler.py @@ -7,6 +7,9 @@ from mlir.passmanager import PassManager from mlir.ir import Context, Module +import ctypes +from triton._C.libtriton.aipu import UnifiedHardwareAIPU + from dataclasses import dataclass import functools import hashlib @@ -96,6 +99,12 @@ def make_ttir(mod, metadata, opt): @staticmethod def make_linalg(mod, metadata, opt): + uh_aipu = UnifiedHardwareAIPU() + uh_aipu_ptr = ctypes.cast(id(uh_aipu), ctypes.c_void_p) + ptr_value = int(uh_aipu_ptr.value) + ptr_attr = ir.make_attr_i64([ptr_value], mod.context) + #mod.set_attr("uh_aipu_ptr", ptr_attr) # TODO + pm = ir.pass_manager(mod.context) pm.enable_debug() # Add pass here. diff --git a/third_party/aipu/triton_aipu.cc b/third_party/aipu/triton_aipu.cc index b617232df..47014e011 100644 --- a/third_party/aipu/triton_aipu.cc +++ b/third_party/aipu/triton_aipu.cc @@ -22,6 +22,8 @@ #include "triton-shared/Conversion/TritonToLinalg/TritonToLinalg.h" #include "triton-shared/Conversion/TritonToLinalgExperimental/TritonToLinalgExperimental.h" +#include "unified_hardware_aipu.h" + #include #include #include @@ -69,6 +71,7 @@ void init_triton_aipu(py::module &&m) { init_triton_aipu_common(m.def_submodule("common")); auto passes = m.def_submodule("passes"); init_triton_aipu_passes_convert(passes.def_submodule("convert")); + // load dialects m.def("load_dialects", [](mlir::MLIRContext &context) { using namespace mlir; @@ -88,4 +91,6 @@ void init_triton_aipu(py::module &&m) { context.loadAllAvailableDialects(); }); // register passes here + py::class_(m, "UnifiedHardwareAIPU") + .def(py::init<>()); // flagtree } diff --git a/third_party/aipu/unified_hardware_aipu.h b/third_party/aipu/unified_hardware_aipu.h new file mode 100644 index 000000000..f8f272ecc --- /dev/null +++ b/third_party/aipu/unified_hardware_aipu.h @@ -0,0 +1,22 @@ +#ifndef UNIFIED_HARDWARE_AIPU_H +#define UNIFIED_HARDWARE_AIPU_H + +#include + +#include "flagtree/Common/UnifiedHardwareBase.h" + +namespace mlir { +namespace aipu { + +class UnifiedHardwareAIPU final : public mlir::flagtree::UnifiedHardware { + + //DMA + std::optional getAllocSpaceForDMATag() const override{ + return std::optional(11); + } +}; + +} // namespace aipu +} // namespace mlir + +#endif // UNIFIED_HARDWARE_AIPU_H \ No newline at end of file