In [1]:
# setup
import warnings;
warnings.filterwarnings('ignore'); #tensorflow gives me weird stuff
import numpy as np;
import tensorflow as tf;
from numpy import matmul as mul
from numpy.linalg import norm as norm
from scipy import sparse
from tensorflow.sparse import to_dense
tf.enable_eager_execution()

In [2]:
def expand(factors, weights, dim_no, cur_idx, cur_prod, all_vals, rank):
    #this method just writes to all values, so all values needs to be saved somewhere
    if dim_no == len(factors):
        value = 0;
        for r in range(rank):
            value += cur_prod[r] * weights[0][r]
        if(value != 0.0):
            all_vals.append((cur_idx,value)) 
    else:
        cur_fact = factors[dim_no]
        for i in range(len(cur_fact)): # go through all rows
            cp = np.ndarray.copy(cur_prod);
            for r in range(rank): # go through each rank
                cp[r] *= cur_fact[i][r]
            expand(factors, weights, dim_no + 1, cur_idx + [i], cp, all_vals, rank)
            
def rebuild_sp_tensor_from_factors(kruskal_tensor, dimensions, rank):
    factors = kruskal_tensor[1]
    weights = kruskal_tensor[0]
    all_values = []
    expand(factors, weights, 0, [], np.ones(rank), all_values, rank)
    indices = [a[0] for a in all_values]
    values = [a[1] for a in all_values]
    shape = dimensions
    st = tf.SparseTensor(indices=indices, values=values, dense_shape=shape)
    return st

In [3]:
def generate_random_factors(dimensions, rank, d = 0.8):
    factors = [sparse.random(dim,rank,density=d).A for dim in dimensions]
    return factors

def expand_random_factors(factors, dim_no, cur_idx, cur_prod, all_vals, rank):
    #this method just writes to all values, so all values needs to be saved somewhere
    if dim_no == len(factors):
        value = sum(cur_prod) * (3.16**dim_no) # root 10. makes the numbers closer to [0-1]
        if(value != 0.0):
            all_vals.append((cur_idx,value)) 
    else:
        cur_fact = factors[dim_no]
        for i in range(len(cur_fact)):
            cp = np.ndarray.copy(cur_prod);
            for r in range(rank):
                cp[r] *= cur_fact[i][r]
            expand_random_factors(factors, dim_no + 1, cur_idx + [i], cp, all_vals, rank)
            
def generate_decomposable_sp_tensor(dimensions, rank):
    factors = generate_random_factors(dimensions, rank)
    all_values = []
    expand_random_factors(factors, 0, [], np.ones(rank), all_values, rank)
    indices = [a[0] for a in all_values]
    values = [a[1] for a in all_values]
    shape = dimensions
    st = tf.SparseTensor(indices=indices, values=values, dense_shape=shape)
    return st

def generate_random_sp_tensor(dimensions, d = 0.2):
    nd = len(dimensions)
    num_items = min(100000 , (int)(np.prod(dimensions) * d))    
    idxs = set()
    
    for i in range(num_items):
        rand = np.random.rand(nd) #gives us a random index
        index = tuple(np.trunc(np.multiply(rand,dimensions)).astype(int))
        idxs.add(index)
        
    indices = list(idxs)
    values = np.random.rand(len(indices))
    indices.sort()
    st = tf.SparseTensor(indices=indices, values=values, dense_shape=dimensions)
    return st

In [4]:
def tensor_norm(st):
    return (sum([x**2 for x in st.values.numpy()])**0.5)

def difference_frobenius_norm(spt1, spt2):
    idx1 = [tuple(s) for s in spt1.indices.numpy()]
    idx2 = [tuple(s) for s in spt2.indices.numpy()]
    val1 = spt1.values.numpy()
    val2 = spt2.values.numpy() 
    s1 = {idx1[i]:val1[i] for i in range(len(idx1))}
    s2 = {idx2[i]:val2[i] for i in range(len(idx2))}
    sum_sq = 0;
    for i in idx1:
        if i in idx2:
            sum_sq += (s1[i] - s2[i]) ** 2
        else:
            sum_sq += s1[i] ** 2
    for i in idx2:
        if i in idx1:
            sum_sq += 0
        else:
            sum_sq += s2[i] ** 2
    return sum_sq ** 0.5

def fit(spt1, spt2):
    return 1 - (difference_frobenius_norm(spt1,spt2)/tensor_norm(spt1))

def easy_fit(spt1,spt2):
    return 1 - (abs(tensor_norm(spt1)-tensor_norm(spt2))/tensor_norm(spt1))

In [5]:
def mttkrp(X, factors, n, rank, dims):    
    output = np.zeros((dims[n],rank))
    indices = X.indices.numpy()
    values = X.values.numpy()
    
    for l in range(len(values)):
        cur_index = indices[l]
        prod = [values[l]]*rank #makes the value into a row

        for mode,cv in enumerate(cur_index): #does elementwise row multiplications
            if(mode != n):
                prod *= factors[mode][cv]
                
        output[cur_index[n]] += prod
    
    return output

In [23]:
# CP Decomposition

def cp_als(X, rank, n_iter_max = 50):
    
    dims = X.shape.as_list()
    nd = len(dims)
    factors = [np.random.random((d,rank)) for d in dims]
    weights = np.ones((1,rank))
    
    for iteration in range(n_iter_max): 
        print(iteration , end="\r")
        for n in range(nd):
            
            #the following block calculates inverse of the hadamard product
            h = mul(weights.T,weights)
            for i,f in enumerate(factors):
                if i != n:
                    h *= mul(f.T,f)
            vinv = np.linalg.pinv(h)
            
            #the following block calculates An by doing MTTKRP and multiplying it by the inverse of the hadamard
            mk = mttkrp(X, factors, n, rank, dims)
            wmk = np.multiply(mk, weights[0]) #handling the weights
            An = mul(wmk,vinv)
            
            #the following block normalizes the columns and stored
            weight = norm(An,axis=0)
            b = np.where(weight<1e-12, 1, weight)
            weights[0] *= b
            An /= b
            
            factors[n] = An
            
    return weights, factors

In [24]:
shape1 = [18,27,12]
rank = 5
st1 = generate_decomposable_sp_tensor(shape1, rank)
#st1 = generate_random_sp_tensor(shape1)
# to_dense(st1)

In [25]:
# for dimensions in range(2,20):
#     for rank in range(1,20):
#         shp = list(np.random.randint(2,100,size=(dimensions)))
#         st = generate_decomposable_sp_tensor(shp, rank)
#         v = len(st.values.numpy())
#         decomp = cp_als(st, rank)
#         rebuilt = rebuild_sp_tensor_from_factors(decomp, shp, rank)
#         fit_val = fit(st,rebuilt)
#         print("fit: {},\trank: {},\tshape: {}\tnumber of nonzeros: {}".format(fit_val, rank, shp,v))

'''
A robust testing framework should test:
    tensors of varying number of dimensions
    tensors of various sizes
    tensors of varying rank
    tensors of varying sparsity
    tensors that are not perfectly decomposable
    running a variety of different ranks
    running a variety of different max number of iterations
'''
print()




In [26]:
cpd = cp_als(st1, rank)
rebuilt = rebuild_sp_tensor_from_factors(cpd, shape1, rank)
# print(difference_frobenius_norm(st1, rebuilt)/tensor_norm(st1))
cpd

0

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [432]:
easy_fit(st1,rebuilt)

0.9999965033467882

In [433]:
fit(st1,rebuilt)

0.9973555162699668