In [1]:
from hdmm.workload import Marginals, MarginalsGram, EkteloMatrix
from hdmm import templates
import numpy as np
from scipy.sparse.linalg import spsolve_triangular
from scipy import sparse
from scipy.optimize import check_grad, minimize

In [2]:
np.random.seed(0)
w = np.random.rand(16)
w[-1] = 0
W = Marginals((2,3,4,5), w)
gram = W.gram()
d = 4
A = np.arange(2**d)
mult = gram._mult
weights = W.weights
dphi = np.array([np.dot(weights**2, mult[A|b]) for b in range(2**d)])
Xmatrix = gram._Xmatrix

def foo(theta2):
    Y, YT = Xmatrix(theta2)
    params = Y.dot(theta2)
    X, XT = Xmatrix(params)
    phi = spsolve_triangular(X, theta2, lower=False)
    return np.dot(phi, dphi)

for _ in range(1000):
    x = np.random.rand(16)
    x[-1] = 0
    #x /= x.sum()
    y = np.random.rand(16)
    #y /= y.sum()

    assert 0.5*(foo(x) + foo(y)) >= foo(0.5*x + 0.5*y)

LinAlgError: A is singular: diagonal 15 is zero.

In [3]:
def opt(W):
    G = W.gram()
    Xmatrix = G._Xmatrix
    z = np.zeros_like(G.weights)
    z[-1] = 1
    dphi = Xmatrix(G.weights)[1].dot(np.ones(z.size)) * W.shape[1]
    d = len(W.domain)
    A = np.arange(2**d)

    def foo(v):
        delta = np.sum(v)
        ddelta = 1
        
        X, XT = Xmatrix(v)
        phi = spsolve_triangular(X, z, lower=False)
        ans = np.dot(phi, dphi)
        dXvect = -spsolve_triangular(XT, dphi, lower=True)
        dv = np.array([np.dot(dXvect[A&b]*phi, G._mult[A|b]) for b in range(2**d)])
        
        return delta*ans, delta*dv + ddelta*ans
    
    #print('gradient check', check_grad(lambda v: foo(v)[0], lambda v: foo(v)[1], v))
    
    res = minimize(foo, x0=np.ones(2**d) / 2**d, method='L-BFGS-B', jac=True, bounds=[(0,None)]*2**d)
    w = res.x / res.x.sum()
    
    return Marginals(W.domain, np.sqrt(w))

weights = np.random.rand(4)
weights = np.array([1,1,1,3])
W = Marginals((2,2), weights)
A = opt(W)

print(weights**2)
print(A.weights**2)

d = np.linalg.eig(W.gram().dense_matrix())[0]
e = np.linalg.eig(A.gram().dense_matrix())[0]
print(d)
#print(np.sqrt(d))
#print(e / e.min() * np.sqrt(d.min()))

e = W.gram()._Xmatrix(weights**2)[1].dot(np.ones(4))
w = weights**2
e[0] = 4*w[0] + 2*w[1] + 2*w[2] + w[3]
e[1] = 2*w[1] + w[3]
e[2] = 2*w[2] + w[3]
e[3] = w[3]
print(e)

Y = np.array([[4,2,2,1], [0,2,0,1], [0,0,2,1],[0,0,0,1]])
e = Y.dot(w)
w2 = np.linalg.inv(Y).dot(np.sqrt(e))
print(w2 / w2.sum())

[1 1 1 9]
[0.03560958 0.04603335 0.04603335 0.87232372]
[17. 11.  9. 11.]
[17. 11. 11.  9.]
[0.03560944 0.04603324 0.04603324 0.87232409]


In [4]:
def eigenopt(W):
    G = W.gram()
    d = len(G.domain)
    # create Y matrix
    Y = sparse.dok_matrix((2**d, 2**d))
    for a in range(2**d):
        for b in range(2**d):
            if b&a == a:
                Y[a,b] = G._mult[b]
    Y = Y.tocsr()
    
    # compute unique eigenvalues
    e = Y.dot(G.weights)
    # recover weights of optimal strategy
    w = spsolve_triangular(Y, np.sqrt(e), lower=False)
    w = np.maximum(w, 0)
    return Marginals(W.domain, np.sqrt(w))

weights = np.random.rand(4)
W = Marginals((2,7), weights)
D = eigenopt(W)

In [5]:
#weights = np.array([1,1,1,1])

weights = np.random.rand(16)
W = Marginals((2,3,4,5), weights)

C = opt(W).dense_matrix()
D = eigenopt(W).dense_matrix()

W = W.dense_matrix()
#.dense_matrix()
WtW = W.T.dot(W)

#eigs = np.linalg.eigvals(WtW.dense_matrix())
#eigs
u,s,v = np.linalg.svd(W)
w, z = np.linalg.eigh(WtW)

svdb = s.sum()**2 / W.shape[1]

A = np.diag(np.sqrt(s)).dot(v)
AtA = A.T.dot(A)
AtA1 = np.linalg.pinv(AtA)
err1 = np.max(np.diag(AtA)) * np.sum(AtA1 * WtW)

B = np.diag(w**0.25).dot(z.T)
BtB = B.T.dot(B)
BtB1 = np.linalg.pinv(BtB)
err2 = np.max(np.diag(BtB)) * np.sum(BtB1 * WtW)

CtC = C.T.dot(C)
CtC1 = np.linalg.pinv(CtC)
err3 = np.max(np.diag(CtC)) * np.sum(CtC1 * WtW)

DtD = D.T.dot(D)
DtD1 = np.linalg.pinv(DtD)
err4 = np.max(np.diag(DtD)) * np.sum(DtD1 * WtW)

print(err1, err2, err3, err4, svdb)

199.42261685033685 199.4226168503375 199.57410544157003 199.8631765687516 199.4226168503366


In [6]:
from functools import reduce 

def svdb(W):
    G = W.gram()
    d = len(G.domain)
    # create Y matrix
    Y = sparse.dok_matrix((2**d, 2**d))
    for a in range(2**d):
        for b in range(2**d):
            if b&a == a:
                Y[a,b] = G._mult[b]
    Y = Y.tocsr()
    
    # compute unique eigenvalues
    e = Y.dot(G.weights)
    # now compute multiplicities 
    mult = reduce(np.kron, [[1,n-1] for n in G.domain])
    
    ans = np.dot(mult, np.sqrt(e))**2 / mult.sum()
    
    return ans

W = Marginals((2,3,4,5), weights)

print(svdb(W))

199.42261685033654


In [None]:
import benchmarks
from hdmm import error

W1 = benchmarks.adult_big()[0]

print(np.sqrt(svdb(W1) / W1.shape[0]))

A = eigenopt(W1)
print(error.rootmse(W1, A))