In [2]:
import warp as wp

I = wp.mat33(
    1.0, 0.0, 0.0,
    0.0, 1.0, 0.0,
    0.0, 0.0, 1.0,
)

print(I)


a = wp.array([1.0, 2.0, 3.0], dtype=wp.float32)
b = wp.array([10.0, 20.0, 30.0], dtype=wp.float32)
c = wp.array([10.0, 20.0, 30.0], dtype=wp.float32)


[[1.0, 0.0, 0.0],
 [0.0, 1.0, 0.0],
 [0.0, 0.0, 1.0]]
Warp 1.10.0.dev20250927 initialized:
   Git commit: 1049c9e9a6c5d8a5c001cfa93e1ebe9eabe96e5b
   CUDA driver not found or failed to initialize
   Devices:
     "cpu"      : "Intel64 Family 6 Model 186 Stepping 2, GenuineIntel"
   Kernel cache:
     \\?\C:\Users\baiix\AppData\Local\NVIDIA\warp\Cache\1.10.0.dev20250927


In [3]:
from warp.sparse import bsr_from_triplets
from warp.optim.linear import cg
from warp.sparse import bsr_zeros, bsr_identity, bsr_diag, bsr_copy
from warp.sparse import BsrMatrix


# Create a 4x4 sparse matrix with 2x2 blocks
rows = wp.array([0, 1, 2], dtype=int)  # Row indices
cols = wp.array([1, 2, 3], dtype=int)  # Column indices
vals = wp.array([1.0, 2.0, 3.0], dtype=float)    # Block values

# Create BSR matrix
A = bsr_from_triplets(
    rows_of_blocks=2,      # Number of rows of blocks
    cols_of_blocks=2,      # Number of columns of blocks
    rows=rows,            # Row indices
    columns=cols,         # Column indices
    values=vals           # Block values
)

print(A)

Module warp.sparse 1ba7a46 load on device 'cpu' took 1.43 ms  (cached)
BsrMatrix_float32(
	nrow=2,
	ncol=2,
	nnz=3,
	offsets=[0 1 1],
	columns=[      1 6029427 7733358],
	values=[1.00e+00 8.83e-43 1.26e-44],
)


In [4]:
A = bsr_zeros(rows_of_blocks=4, cols_of_blocks=4, block_type=wp.mat33)

print(A)

BsrMatrix_float32_3_3(
	nrow=4,
	ncol=4,
	nnz=0,
	offsets=[0 0 0 0 0],
	columns=[],
	values=[],
)


In [5]:
I = bsr_identity(rows_of_blocks=10, block_type=wp.float32)

print(I)

BsrMatrix_float32(
	nrow=10,
	ncol=10,
	nnz=10,
	offsets=[ 0  1  2  3  4  5  6  7  8  9 10],
	columns=[0 1 2 3 4 5 6 7 8 9],
	values=[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.],
)


In [6]:
# 3x3 blocks
A = bsr_zeros(2, 2, block_type=wp.mat33)

print(A)

BsrMatrix_float32_3_3(
	nrow=2,
	ncol=2,
	nnz=0,
	offsets=[0 0 0],
	columns=[],
	values=[],
)


In [7]:
# Valid conversions:
A = bsr_zeros(4, 4, block_type=wp.mat22)  # 8x8 matrix with 2x2 blocks
B = bsr_copy(A, block_shape=(1, 1))       # 8x8 matrix with 1x1 blocks
C = bsr_copy(A, block_shape=(4, 4))       # 8x8 matrix with 4x4 blocks

print(A)
print(B)
print(C)



BsrMatrix_float32_2_2(
	nrow=4,
	ncol=4,
	nnz=0,
	offsets=[0 0 0 0 0],
	columns=[],
	values=[],
)
BsrMatrix_float32(
	nrow=8,
	ncol=8,
	nnz=0,
	offsets=[0 0 0 0 0 0 0 0 0],
	columns=[],
	values=[],
)
BsrMatrix_float32_4_4(
	nrow=2,
	ncol=2,
	nnz=0,
	offsets=[0 0 0 0 0],
	columns=[],
	values=[],
)


# 1.对角矩阵

In [8]:
rows = wp.array([0, 1, 2], dtype=int)
cols = wp.array([0, 1, 2], dtype=int)
vals = wp.array([10.0, 20.0, 30.0], dtype=float)

A = bsr_from_triplets(
    rows_of_blocks=3,
    cols_of_blocks=3,
    rows=rows,
    columns=cols,
    values=vals
)
print(A)


# Get row indices from compressed format
nnz = A.nnz_sync()
rows = A.uncompress_rows()  # Returns array of row indices for each block

# Get column indices and values
cols = A.columns           # Column indices for each block
vals = A.values            # Block values

print(nnz)
print(rows)
print(cols)
print(vals)
# Now you have the COO format:
# - rows[i] is the row index of block i
# - cols[i] is the column index of block i
# - vals[i] is the value of block i

BsrMatrix_float32(
	nrow=3,
	ncol=3,
	nnz=3,
	offsets=[0 1 2 3],
	columns=[0 1 2],
	values=[10. 20. 30.],
)
3
[0 1 2]
[0 1 2]
[10. 20. 30.]


In [9]:
import warp as wp
import numpy as np

def warp_coo_deduplicate(rows, cols, vals, reduce="sum"):
    """
    合并重复 (row, col) 对，返回去重后的 rows, cols, vals
    直接处理 warp.array，避免不必要的转换
    
    Args:
        rows: wp.array，行索引
        cols: wp.array，列索引  
        vals: wp.array，值
        reduce: str, 聚合方式 "sum" | "mean" | "first"
        
    Returns:
        tuple: (out_rows, out_cols, out_vals) 去重后的 warp 数组
    """
    # 获取 numpy 视图进行计算（避免复制）
    rows_np = rows.numpy()
    cols_np = cols.numpy()
    vals_np = vals.numpy()
    
    # 合并成单一索引，用于唯一化
    max_col = np.max(cols_np) if len(cols_np) > 0 else 0
    idx = rows_np * (max_col + 1) + cols_np
    
    # 找到唯一索引及反向映射
    unique_idx, inv = np.unique(idx, return_inverse=True)
    
    n_unique = len(unique_idx)
    out_rows_np = unique_idx // (max_col + 1)
    out_cols_np = unique_idx % (max_col + 1)
    
    # 聚合操作
    if reduce == "sum":
        out_vals_np = np.zeros(n_unique, dtype=vals_np.dtype)
        np.add.at(out_vals_np, inv, vals_np)
    elif reduce == "mean":
        out_vals_np = np.zeros(n_unique, dtype=vals_np.dtype)
        counts = np.zeros(n_unique, dtype=int)
        np.add.at(out_vals_np, inv, vals_np)
        np.add.at(counts, inv, 1)
        out_vals_np = out_vals_np / counts
    elif reduce == "first":
        # 保留第一个出现的值
        _, first_indices = np.unique(idx, return_index=True)
        out_vals_np = vals_np[first_indices]
    else:
        raise ValueError("reduce must be one of ['sum', 'mean', 'first']")
    
    # 直接创建 warp 数组返回
    out_rows = wp.array(out_rows_np, dtype=int)
    out_cols = wp.array(out_cols_np, dtype=int)
    out_vals = wp.array(out_vals_np, dtype=vals.dtype)
    
    return out_rows, out_cols, out_vals

In [10]:
def warp_coo_deduplicate(rows, cols, vals):
    """
    去重 COO 格式，vals 为 3x3 矩阵块，只做 sum 聚合
    """
    rows_np = rows.numpy()
    cols_np = cols.numpy()
    vals_np = vals.numpy()  # shape (nnz, 3, 3)
    
    max_col = np.max(cols_np) if len(cols_np) > 0 else 0
    idx = rows_np * (max_col + 1) + cols_np
    
    unique_idx, inv = np.unique(idx, return_inverse=True)
    n_unique = len(unique_idx)
    
    out_rows_np = unique_idx // (max_col + 1)
    out_cols_np = unique_idx % (max_col + 1)
    
    # 向量化累加 3x3 块
    vals_flat = vals_np.reshape(vals_np.shape[0], -1)   # (nnz, 9)
    out_vals_flat = np.zeros((n_unique, 9), dtype=vals_np.dtype)
    np.add.at(out_vals_flat, inv, vals_flat)
    out_vals_np = out_vals_flat.reshape(n_unique, 3, 3)
    
    return (
        wp.array(out_rows_np, dtype=int),
        wp.array(out_cols_np, dtype=int),
        wp.array(out_vals_np, dtype=vals.dtype)
    )


# 2.重复索引

In [15]:
rows = []
cols = []
vals = []

A = bsr_from_triplets(
    rows_of_blocks=3,
    cols_of_blocks=3,
    rows=rows,
    columns=cols,
    values=vals
)
print(A)

AttributeError: 'list' object has no attribute 'ndim'

In [14]:
rows = wp.array([0, 0, 1], dtype=int)
cols = wp.array([1, 1, 2], dtype=int)
vals = wp.array([2.0, 3.0, 4.0], dtype=float)

A = bsr_from_triplets(
    rows_of_blocks=3,
    cols_of_blocks=3,
    rows=rows,
    columns=cols,
    values=vals
)
print(A)

BsrMatrix_float32(
	nrow=3,
	ncol=3,
	nnz=3,
	offsets=[0 1 2 2],
	columns=[1 2 0],
	values=[5.0000000e+00 4.0000000e+00 2.6855396e-06],
)


In [11]:
rows = wp.array([0, 0, 1], dtype=int)
cols = wp.array([1, 1, 2], dtype=int)
vals = wp.array([2.0, 3.0, 4.0], dtype=float)

A = bsr_from_triplets(
    rows_of_blocks=2,
    cols_of_blocks=3,
    rows=rows,
    columns=cols,
    values=vals
)
print(A)

# 去重

# Get row indices from compressed format
nnz = A.nnz_sync()
rows1 = A.uncompress_rows()  # Returns array of row indices for each block

# Get column indices and values
cols1 = A.columns           # Column indices for each block
vals1 = A.values            # Block values

print(nnz)
print(rows1)
print(cols1)
print(vals1)



# Deduplicate COO format
rows2, cols2, vals2 = warp_coo_deduplicate(rows, cols, vals)

A = bsr_from_triplets(
    rows_of_blocks=2,
    cols_of_blocks=3,
    rows=rows2,
    columns=cols2,
    values=vals2
)
print(A)

# Get row indices from compressed format
nnz = A.nnz_sync()
rows1 = A.uncompress_rows()  # Returns array of row indices for each block

# Get column indices and values
cols1 = A.columns           # Column indices for each block
vals1 = A.values            # Block values

print(nnz)
print(rows1)
print(cols1)
print(vals1)

# Now you have the COO format:
# - rows[i] is the row index of block i
# - cols[i] is the column index of block i
# - vals[i] is the value of block i


BsrMatrix_float32(
	nrow=2,
	ncol=3,
	nnz=3,
	offsets=[0 1 2],
	columns=[1 2 0],
	values=[5. 4. 0.],
)
2
[0 1]
[1 2 0]
[5. 4. 0.]
BsrMatrix_float32_3_3(
	nrow=2,
	ncol=3,
	nnz=2,
	offsets=[0 1 2],
	columns=[1 2],
	values=[[[5. 5. 5.]
  [5. 5. 5.]
  [5. 5. 5.]]

 [[4. 4. 4.]
  [4. 4. 4.]
  [4. 4. 4.]]],
)
2
[0 1]
[1 2]
[[[5. 5. 5.]
  [5. 5. 5.]
  [5. 5. 5.]]

 [[4. 4. 4.]
  [4. 4. 4.]
  [4. 4. 4.]]]


# 3.mat33

In [12]:
# 定义 block 的行列索引（块坐标）
rows = wp.array([0], dtype=int)
cols = wp.array([0], dtype=int)

# 每个块是一个 3x3 矩阵 (wp.mat33)
vals = wp.array([
    wp.mat33(  # block (0,0)
        1.0, 0.0, 0.0,
        0.0, 1.0, 0.0,
        0.0, 0.0, 1.0
    )
], dtype=wp.mat33)

# 构建 BSR 矩阵（块为3x3）
A = bsr_from_triplets(
    rows_of_blocks=3,   # block 行数
    cols_of_blocks=3,   # block 列数
    rows=rows,          # block 行索引
    columns=cols,       # block 列索引
    values=vals         # 每个块的值 (mat33)
)

print(A)

# Get row indices from compressed format
nnz = A.nnz_sync()
rows = A.uncompress_rows()  # Returns array of row indices for each block

# Get column indices and values
cols = A.columns           # Column indices for each block
vals = A.values            # Block values

print(nnz)
print(rows)
print(cols)
print(vals)



# Now you have the COO format:
# - rows[i] is the row index of block i
# - cols[i] is the column index of block i
# - vals[i] is the value of block i

BsrMatrix_float32_3_3(
	nrow=3,
	ncol=3,
	nnz=1,
	offsets=[0 1 1 1],
	columns=[0],
	values=[[[1. 0. 0.]
  [0. 1. 0.]
  [0. 0. 1.]]],
)
1
[0]
[0]
[[[1. 0. 0.]
  [0. 1. 0.]
  [0. 0. 1.]]]
