### Sum 

In [3]:
import numpy as np
import tvm
from tvm import te

In [4]:
a = np.random.normal(size=(3,4)).astype('float32')
a.sum(axis=1)

array([-0.8399152,  1.7566912, -0.9235877], dtype=float32)

In [5]:
def sum_rows(a,b):
    n = len(b)
    for i in range(n):
        b[i] = np.sum(a[i,:])
        
b = np.empty((3,),dtype='float32')
sum_rows(a, b)
b

array([-0.8399152,  1.7566912, -0.9235877], dtype=float32)

In [6]:
n,m = te.var('n'), te.var('m')
A = te.placeholder((n,m),name='a')
j = te.reduce_axis((0,m),name='j')
B = te.compute((n,), lambda i:te.sum(A[i,j],axis=j),name='b')
s = te.create_schedule(B.op)
tvm.lower(s,[A,B],simple_mode=True)

#[version = "0.0.5"]
@main = primfn(a_1: handle, b_1: handle) -> ()
  attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
  buffers = {a: Buffer(a_2: Pointer(float32), float32, [(stride: int32*n: int32)], [], type="auto"),
             b: Buffer(b_2: Pointer(float32), float32, [(stride_1: int32*n)], [], type="auto")}
  buffer_map = {a_1: a, b_1: b}
  preflattened_buffer_map = {a_1: a_3: Buffer(a_2, float32, [n, m: int32], [stride, stride_2: int32], type="auto"), b_1: b_3: Buffer(b_2, float32, [n], [stride_1], type="auto")} {
  for (i: int32, 0, n) {
    b[(i*stride_1)] = 0f32
    for (j: int32, 0, m) {
      b[(i*stride_1)] = (b[(i*stride_1)] + a[((i*stride) + (j*stride_2))])
    }
  }
}

#[metadata]
{
  "root": 1, 
  "nodes": [
    {
      "type_key": ""
    }, 
    {
      "type_key": "Map", 
      "keys": [
        "IntImm"
      ], 
      "data": [2]
    }, 
    {
      "type_key": "Array", 
      "data": [3, 4]
    }, 
    {
      "type_key": "In

In [7]:
mod = tvm.build(s,[A,B])
c = tvm.nd.array(np.empty((3,),dtype='float32'))
mod(tvm.nd.array(a),c)
np.testing.assert_equal(b, c.asnumpy())

In [8]:
### reduce to 0-rank tensor
i = te.reduce_axis((0, n), name='i')
B = te.compute((), lambda: te.sum(A[i, j], axis=(i, j)), name='b')
s = te.create_schedule(B.op)
tvm.lower(s, [A, B], simple_mode=True)

mod = tvm.build(s,[A,B])
c = tvm.nd.array(np.empty((), dtype='float32'))
mod(tvm.nd.array(a),c)
np.testing.assert_allclose(a.sum(),c.asnumpy(),atol=1e-5)

### Commutative Reduction 

In [9]:
def prod_rows(a, b):
    ### a is n-by-m 2-D matrix, b is an n-length 1-D vector
    n, m = a.shape
    for i in range(n):
        b[i] = 1
        for j in range(m):
            b[i] = b[i] * a[i,j]

In [10]:
comp = lambda a, b: a * b
init = lambda dtype: tvm.tir.const(1, dtype=dtype)
product = te.comm_reducer(comp, init)

In [12]:
n = te.var('n')
m = te.var('m')
A = te.placeholder((n, m), name='a')
k = te.reduce_axis((0, m), name='k')
B = te.compute((n,), lambda i: product(A[i, k], axis=k), name='b')
s = te.create_schedule(B.op)
low = tvm.lower(s, [A, B], simple_mode=True)

In [None]:
mod = tvm.build(s, [A,B])
b = tvm.nd.array(np.empty((3,),dtype='float32'))
mod(tvm.nd.array(a), b)
np.testing.assert_allclose(a.prod(axis=1), b.asnumpy(), atol=1e-5)