<a href="https://colab.research.google.com/github/bartonzzx/MLC/blob/main/2_MCL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!python3 -m  pip install mlc-ai-nightly -f https://mlc.ai/wheels

Looking in links: https://mlc.ai/wheels
Collecting mlc-ai-nightly
  Downloading https://github.com/mlc-ai/package/releases/download/v0.9.dev0/mlc_ai_nightly-0.15.dev570-cp312-cp312-manylinux_2_28_x86_64.whl (185.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m185.6/185.6 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mlc-ai-nightly
Successfully installed mlc-ai-nightly-0.15.dev570


In [4]:
import IPython
import numpy as np
import tvm
from tvm.ir.module import IRModule
from tvm.script import tir as T

## 如何编写TensorIR

In [5]:
# numpy data initialization
a = np.arange(16).reshape(4, 4)
b = np.arange(16, 0, -1).reshape(4, 4)
a,b

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]]),
 array([[16, 15, 14, 13],
        [12, 11, 10,  9],
        [ 8,  7,  6,  5],
        [ 4,  3,  2,  1]]))

In [6]:
# numpy add
c_np = a + b
c_np

array([[16, 16, 16, 16],
       [16, 16, 16, 16],
       [16, 16, 16, 16],
       [16, 16, 16, 16]])

In [7]:
# low level numpy add
def lnumpy_add(a: np.ndarray, b: np.ndarray, c: np.ndarray):
    for i in range(4):
        for j in range(4):
            c[i, j] = a[i, j] + b[i, j]

c_lnp = np.empty((4, 4), dtype = np.int64)
lnumpy_add(a, b, c_lnp)
c_lnp

array([[16, 16, 16, 16],
       [16, 16, 16, 16],
       [16, 16, 16, 16],
       [16, 16, 16, 16]])

In [8]:
# TensorIR add
@tvm.script.ir_module
class MyAdd:
    @T.prim_func
    def add(
            A: T.Buffer((4, 4), "int64"),
            B: T.Buffer((4, 4), "int64"),
            C: T.Buffer((4, 4), "int64")
    ):
        T.func_attr({"global_symbol": "add"})
        for i, j in T.grid(4, 4):
            with T.block("C"):
                vi = T.axis.spatial(4, i)
                vj = T.axis.spatial(4, j)
                C[vi, vj] = A[vi, vj] + B[vi, vj]

rt_lib = tvm.build(MyAdd, target = "llvm")
a_tvm = tvm.nd.array(a)
b_tvm = tvm.nd.array(b)
c_tvm = tvm.nd.array(np.empty((4, 4), dtype = np.int64))
rt_lib["add"](a_tvm, b_tvm, c_tvm)
np.testing.assert_allclose(c_tvm.numpy(), c_np, rtol = 1e-5)

# 练习1:广播加法

In [9]:
# init data
a = np.arange(16).reshape(4,4)
b = np.arange(4, 0, -1).reshape(4)
a, b

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]]),
 array([4, 3, 2, 1]))

In [10]:
# numpy version
c_np = a + b
c_np

array([[ 4,  4,  4,  4],
       [ 8,  8,  8,  8],
       [12, 12, 12, 12],
       [16, 16, 16, 16]])

In [11]:
@tvm.script.ir_module
class MyAdd:
    @T.prim_func
    def add(
            A: T.Buffer((4, 4), "int64"),
            B: T.Buffer((4), "int64"),
            C: T.Buffer((4, 4), "int64")
    ):
        T.func_attr({"global_symbol": "add"})
        for i, j in T.grid(4, 4):
            with T.block("C"):
                vi = T.axis.spatial(4, i)
                vj = T.axis.spatial(4, j)
                C[vi, vj] = A[vi, vj] + B[vj]

rt_lib = tvm.build(MyAdd, "llvm")
a_tvm = tvm.nd.array(a)
b_tvm = tvm.nd.array(b)
c_tvm = tvm.nd.array(np.empty((4, 4), dtype = np.int64))
rt_lib["add"](a_tvm, b_tvm, c_tvm)
np.testing.assert_allclose(c_tvm.numpy(), c_np, rtol = 1e-5)

# 练习2:二维卷积

In [21]:
N, CI, H, W, CO, K = 1, 1, 8, 8, 2, 3
OUT_H, OUT_W = H - K + 1, W - K + 1
data = np.arange(N*CI*H*W).reshape(N, CI, H, W)
weight = np.arange(CO*CI*K*K).reshape(CO, CI, K, K)
data, weight

(array([[[[ 0,  1,  2,  3,  4,  5,  6,  7],
          [ 8,  9, 10, 11, 12, 13, 14, 15],
          [16, 17, 18, 19, 20, 21, 22, 23],
          [24, 25, 26, 27, 28, 29, 30, 31],
          [32, 33, 34, 35, 36, 37, 38, 39],
          [40, 41, 42, 43, 44, 45, 46, 47],
          [48, 49, 50, 51, 52, 53, 54, 55],
          [56, 57, 58, 59, 60, 61, 62, 63]]]]),
 array([[[[ 0,  1,  2],
          [ 3,  4,  5],
          [ 6,  7,  8]]],
 
 
        [[[ 9, 10, 11],
          [12, 13, 14],
          [15, 16, 17]]]]))

In [22]:
# torch version
import torch

data_torch = torch.Tensor(data)
weight_torch = torch.Tensor(weight)
conv_torch = torch.nn.functional.conv2d(data_torch, weight_torch)
conv_torch = conv_torch.numpy().astype(np.int64)
conv_torch

array([[[[ 474,  510,  546,  582,  618,  654],
         [ 762,  798,  834,  870,  906,  942],
         [1050, 1086, 1122, 1158, 1194, 1230],
         [1338, 1374, 1410, 1446, 1482, 1518],
         [1626, 1662, 1698, 1734, 1770, 1806],
         [1914, 1950, 1986, 2022, 2058, 2094]],

        [[1203, 1320, 1437, 1554, 1671, 1788],
         [2139, 2256, 2373, 2490, 2607, 2724],
         [3075, 3192, 3309, 3426, 3543, 3660],
         [4011, 4128, 4245, 4362, 4479, 4596],
         [4947, 5064, 5181, 5298, 5415, 5532],
         [5883, 6000, 6117, 6234, 6351, 6468]]]])

In [30]:
@tvm.script.ir_module
class MyConv:
  @T.prim_func
  def conv(
          Data: T.Buffer((N, CI, H, W), "int64"),
          Weight: T.Buffer((CO, CI, K, K), "int64"),
          Conv: T.Buffer((N, CO, OUT_H, OUT_W),"int64")
  ):
    T.func_attr({"global_symbol": "conv", "tir.noalias": True})
    for va, vb, vc, vd in T.grid(N, CO, OUT_H, OUT_W):
        with T.block("Conv_init"):
            a, b, c, d = T.axis.remap("SSSS", [va, vb, vc, vd])
            Conv[a, b, c, d] = T.int64(0)
        # 注意，不能在block内新建循环，因为block内部的逻辑表示的是一个"点"的计算规则或定义，而循环应该由grid来构造
        for ve, vf, vg in T.grid(K, K, CI):
            with T.block("Conv"):
                a, b, c, d, e, f, g = T.axis.remap("SSSSSSS", [va, vb, vc, vd, ve, vf, vg])
                Conv[a, b, c, d] = Conv[a, b, c, d] + Data[a, g, c + e, d + f] * Weight[b, g, e, f]

rt_lib = tvm.build(MyConv, target="llvm")
data_tvm = tvm.nd.array(data)
weight_tvm = tvm.nd.array(weight)
conv_tvm = tvm.nd.array(np.empty((N, CO, OUT_H, OUT_W), dtype=np.int64))
rt_lib["conv"](data_tvm, weight_tvm, conv_tvm)
# np.testing.assert_allclose(conv_tvm.numpy(), conv_torch, rtol=1e-5)