Dummy version of the semiring mat-mul with a simpler matrix for testing

In [1]:
import numpy as np
from itertools import product
import time
import math

In [2]:
# def mat_hash(a,b):
#     return hash(a.tobytes()) ^ hash(hash(b.tobytes()))

def mat_hash(a,b):
    return tuple(np.append(a,b))

def build_precompute_table(k, semiring_set):
    
    precompute_table = {}
    #multiplying vectors of size
    # [k_a * 1] * [1 * k_b]
    for a_combination in product(semiring_set, repeat=k):
        
        
        #if drop_too_large(a_combination, cutoff):
        #    continue
        
        #only do the work if we know we are gonna keep
        np_a = np.asarray(a_combination).reshape(k,1)
        
        np_a.flags.writeable = False
        #now loop through b
        for b_combination in product(semiring_set, repeat=k):
            
            
            #if drop_too_large(b_combination, cutoff):
            #    continue 
            np_b = np.asarray(b_combination).reshape(k, 1)
            np_b.flags.writeable=False
            
#             print("combo")
#             print(np_a.shape)
#             print(np_b.shape)
#             print(np_b)
#             print(a.tobytes())
#             print(b.tobytes())
            
            #print(mat_hash(np_a,np_b))
            #now that the segments are ready we multiply and store
            output = np_a @ np_b.T
            precompute_table[mat_hash(np_a, np_b)] = output
    
    return precompute_table
    #innermost loop, fill with each item in the set
    
    
def build_random_mat(size):
    
    return np.random.choice([0,1], size=size*size).reshape(size,size)


#main kernel for semiring, just looks up item in precompute table
def semiring_kernel(a, b, c, precompute_table, n):
    
    
    #k*n, n*k]
    for i in range(n):
        
        a_inner = a[:,i].reshape(-1,1)
        b_inner = b[i,:].reshape(-1,1)
        
#         print(a_inner)
        
#         print(b_inner)
       
        
        
        
        c+=precompute_table[mat_hash(a_inner,b_inner)]
        
#main semiring function
def blocked_semiring(a,b, precompute_table, k_semi):

    #allocate output
    c = np.zeros((a.shape[0], b.shape[1]))
    
    n =  a.shape[1]
    
    for i in range(0, n, k_semi):
        
        for j in range(0, n, k_semi):
            
            #if dims fit we run the precomputed kernel
            if i+k_semi < n and j+k_semi < n:

                semiring_kernel(a[i:i+k_semi], b[:,j:j+k_semi], c[i:i+k_semi,j:j+k_semi], precompute_table, n)


            else:
                
                i_end = min(i+k_semi,n)
                j_end = min(j+k_semi,n)
                
                
                
                regular_kernel(a[i:i_end], b[:,j:j_end], c[i:i_end,j:j_end])
                

    return c

def regular_kernel(a,b,c):
    c+= a @ b
    
    
    
#testing kernel for semiring, just looks up item in precompute table
def semiring_kernel_test(a, b, c, precompute_table, n):
    
    
    #k*n, n*k]
    for i in range(n):
        
        a_inner = a[:,i].reshape(-1,1)
        b_inner = b[i,:].reshape(-1,1)
        
#         print(a_inner)
        
#         print(b_inner)
        
        
        try:
            np.testing.assert_array_equal(a_inner @ b_inner.T,precompute_table[mat_hash(a_inner,b_inner)])
        
        except:
            print("Broke on Batch {}".format(i))
            print("A: {}".format(a_inner.shape) )
            print(a_inner)
            print("B")
            print(b_inner)
            print("Expected")
            print(a_inner @ b_inner.T)
            print("Received")
            print(precompute_table[mat_hash(a_inner,b_inner)])
        
        
        try:
        
            c+=precompute_table[mat_hash(a_inner,b_inner)]
            
        except:
            print(a_inner)
            print(b_inner)
            print(mat_hash(a_inner,b_inner))
            
            
#main semiring function
def blocked_semiring_debug(a,b, precompute_table, k_semi):

    c = np.zeros((a.shape[0], b.shape[1]))
    
    assert(a.shape[1] == b.shape[0])
    
    n =  a.shape[1]
    
    for i in range(0, a.shape[0], k_semi):
        
        #we are safe to allocate blocks
        
        
        for j in range(0, b.shape[1], k_semi):
            
            if i+k_semi < a.shape[0] and j+k_semi < b.shape[1]:
                
#                 print("semiring kernel: A[{}:{}], B[:,{}:{}]".format(i,i+k_semi,j,j+k_semi))
#                 print(a[i:i+k_semi])
#                 print(b[:,j:j+k_semi])
                

                semiring_kernel_test(a[i:i+k_semi], b[:,j:j+k_semi], c[i:i+k_semi,j:j+k_semi], precompute_table, n)


            else:
                
                i_end = min(i+k_semi,a.shape[0])
                j_end = min(j+k_semi,b.shape[1])
                
#                 print("Regular kernel: A[{}:{}], B[:,{}:{}]".format(i,i_end,j,j_end))
#                 print(a[i:i_end])
#                 print(b[:,j:j_end])
                
                
                print(a[i:i_end] @ b[:,j:j_end])
                regular_kernel(a[i:i_end], b[:,j:j_end], c[i:i_end,j:j_end])
                

    return c



def test_precompute_table(a,b,table):
    
    print("A: {}".format(a.shape) )
    print(a)
    print("B: {}".format(b.shape) )
    print(b)
    
    print("Expected")
    print(a @ b.T)
    print("Received")
    print(table[mat_hash(a,b)])
    
    np.testing.assert_array_equal(a @ b.T,table[mat_hash(a,b)])
    
def select_k(q,n):
    
    return math.ceil(.5 + .5 * math.log(n,q) - math.log(math.log(n,q),q))
    
    
#assert that the semiring function is working properly
def test_semiring(size):
    
    
    test_mat = build_random_mat(size)
    
    
    
    k = select_k(len([1,0]),  size)
    
    print("building table for size: {}".format(k))
    
    
    start = time.perf_counter()
    
        
    table = build_precompute_table(k,[1,0])
    
    end = time.perf_counter()
    
    print("Table finished in {}".format(end-start))
        
    new_out = blocked_semiring(test_mat, test_mat, table, k)
        
    end = time.perf_counter()
        
    
    
    
        
    print("{} semiring finished in {}".format(k, end-start))
    
    start = time.perf_counter()
    
    correct_output = test_mat @ test_mat
    
    end = time.perf_counter()
    
    print("numpy completed in {}".format(end-start))
        
        
    np.testing.assert_array_equal(correct_output,new_out)
        
        
    

In [4]:
test_semiring(480)

building table for size: 2
Table finished in 0.0003461549999883573
2 semiring finished in 258.53736199
numpy completed in 0.09600068299994291


In [None]:
k = 5

table = build_precompute_table(k, [1,0])

In [None]:
print(len(table.keys()))


In [None]:
test_mat = build_random_mat(60)
print(test_mat)

In [None]:
# a_break_3 = np.asarray([[1, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 1], [0, 0, 0, 1, 1, 1]])
# print(a_break_3)

# b_break_3 = np.asarray([[1,0,1],[1,0,1],[0,0,0],[1,1,0],[0,1,1],[1,1,1]])
# print(b_break_3)

# a_break_3 @ b_break_3

In [None]:
# #verify these do break the code
# np.testing.assert_array_equal(test_mat[0:3],a_break_3)
# np.testing.assert_array_equal(test_mat[:,0:3],b_break_3)

In [None]:
out_semi = blocked_semiring(test_mat, test_mat, table, k)
#print(out_semi)

In [None]:
out_reg = test_mat @ test_mat
#print(out_reg)

In [None]:
np.testing.assert_array_equal(out_semi,out_reg)