In [1]:
import numpy as np
from pandas import DataFrame

In [2]:
def random_initialization(A,rank):
    number_of_documents = A.shape[0]
    number_of_terms = A.shape[1]
    W = np.random.uniform(1,2,(number_of_documents,rank))
    H = np.random.uniform(1,2,(rank,number_of_terms))
    return W,H
                          

def nndsvd_initialization(A,rank):
    u,s,v=np.linalg.svd(A,full_matrices=False)
    v=v.T
    w=np.zeros((A.shape[0],rank))
    h=np.zeros((rank,A.shape[1]))

    w[:,0]=np.sqrt(s[0])*np.abs(u[:,0])
    h[0,:]=np.sqrt(s[0])*np.abs(v[:,0].T)

    for i in range(1,rank):
        
        ui=u[:,i]
        vi=v[:,i]
        ui_pos=(ui>=0)*ui
        ui_neg=(ui<0)*-ui
        vi_pos=(vi>=0)*vi
        vi_neg=(vi<0)*-vi
        
        ui_pos_norm=np.linalg.norm(ui_pos,2)
        ui_neg_norm=np.linalg.norm(ui_neg,2)
        vi_pos_norm=np.linalg.norm(vi_pos,2)
        vi_neg_norm=np.linalg.norm(vi_neg,2)
        
        norm_pos=ui_pos_norm*vi_pos_norm
        norm_neg=ui_neg_norm*vi_neg_norm
        
        if norm_pos>=norm_neg:
            w[:,i]=np.sqrt(s[i]*norm_pos)/ui_pos_norm*ui_pos
            h[i,:]=np.sqrt(s[i]*norm_pos)/vi_pos_norm*vi_pos.T
        else:
            w[:,i]=np.sqrt(s[i]*norm_neg)/ui_neg_norm*ui_neg
            h[i,:]=np.sqrt(s[i]*norm_neg)/vi_neg_norm*vi_neg.T

    return w,h
def mu_method(A,k,max_iter,init_mode='random'):
    
    if init_mode == 'random':
        W ,H = random_initialization(A,k)
    elif init_mode == 'nndsvd':
        W ,H = nndsvd_initialization(A,k) 
    norms = []
    e = 1.0e-10
    for n in range(max_iter):
        # Update H
        W_TA = W.T@A
        W_TWH = W.T@W@H+e
        for i in range(np.size(H, 0)):
            for j in range(np.size(H, 1)):
                H[i, j] = H[i, j] * W_TA[i, j] / W_TWH[i, j]
        # Update W
        AH_T = A@H.T
        WHH_T =  W@H@H.T+ e

        for i in range(np.size(W, 0)):
            for j in range(np.size(W, 1)):
                W[i, j] = W[i, j] * AH_T[i, j] / WHH_T[i, j]

        norm = np.linalg.norm(A - W@H, 'fro')
        norms.append(norm)
    return W ,H ,norms 

In [6]:
nrows, ncols = 10, 5
toy_vals = np.random.random(nrows*ncols).reshape((nrows, ncols))
X = DataFrame(toy_vals, index=range(nrows), columns=range(ncols))
X

Unnamed: 0,0,1,2,3,4
0,0.206271,0.914666,0.513929,0.250326,0.507801
1,0.216958,0.661296,0.65189,0.868281,0.834745
2,0.104916,0.296565,0.512472,0.359666,0.069648
3,0.352187,0.907169,0.502531,0.136289,0.91831
4,0.051422,0.332116,0.618715,0.438339,0.323589
5,0.903854,0.869996,0.94022,0.208855,0.586102
6,0.866846,0.742217,0.01006,0.133868,0.088735
7,0.696917,0.886286,0.945037,0.002067,0.113629
8,0.318706,0.528739,0.202921,0.334093,0.444856
9,0.007926,0.290274,0.333043,0.601165,0.951627


In [22]:
W,H,_ = mu_method(toy_vals, 3, 1000, init_mode='random')

In [23]:
np.linalg.norm(toy_vals - W@H, 'fro')

0.7284788173952871

In [20]:
A = np.array([1,2,3,4]).reshape((2,2))
B = np.array([1,2,3,4]).reshape((2,2))
A,B,A*B

(array([[1, 2],
        [3, 4]]),
 array([[1, 2],
        [3, 4]]),
 array([[ 1,  4],
        [ 9, 16]]))

In [24]:
DataFrame(W)

Unnamed: 0,0,1,2
0,1.831354,1.640358,1.4327
1,3.829242,0.6139047,1.234301
2,0.8214034,0.1693507,1.671454
3,2.569029,2.019725,0.9871743
4,1.672987,2.899625e-23,1.746612
5,1.289468,3.004301,2.789939
6,8.968468e-27,3.567802,9.689159000000001e-18
7,3.0433259999999997e-38,2.645299,3.365241
8,1.664719,1.40708,0.2281459
9,3.661144,1.071578e-71,3.093431e-08


In [25]:
DataFrame(H)

Unnamed: 0,0,1,2,3,4
0,1.353707e-41,0.111729,0.083456,0.1719296,0.24205
1,0.2268894,0.216706,0.005891,1.401181e-62,0.0560695
2,0.03044254,0.088564,0.276532,0.0160897,1.655401e-18


In [None]:
for n in range(max_iter):
    # Update H
    W_TA = W.T @ A
    W_TWH = W.T @ W @ H + e
    H = H * W_TA / W_TWH

    # Update W
    AH_T = A @ H.T
    WHH_T = W @ H @ H.T + e
    W = W * AH_T / WHH_T

In [33]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def ANMF(X, k, alpha=1, beta=1, max_iter=100):
    """
    Attention-based Non-negative Matrix Factorization (ANMF) using multiplicative update rule.
    
    Parameters:
        alpha: float, optional (default=1)
            The weight parameter for the attention mechanism.
        beta: float, optional (default=1)
            The regularization parameter for the attention weights.
    """
    X = X.T
    # Initialize W, H, and A
    n_samples, n_features = X.shape
    W = np.random.rand(n_samples, k)
    H = np.random.rand(k, n_features)
    A = np.random.rand(n_samples, n_features)
    
    # Update W, H, and A alternatively
    for i in range(max_iter):
        # Update H
        H = H * np.dot((W * np.dot(X * A, H.T)).T, A) / np.dot(np.dot(W.T, A), H.T)
        
        # Update W
        W = W * np.dot(X * A, H.T) / np.dot(np.dot(W, np.dot(H, H.T)), A.T)
        
        # Update A
        Z = np.dot(W, H)
        for j in range(n_samples):
            for l in range(n_features):
                A[j, l] = A[j, l] * ((X[j, l] * Z[j, l])**alpha) / \
                          (np.sum(np.dot(W[j, :].reshape(-1, 1), H[:, l].reshape(1, -1))**alpha) + beta)
    
    return W, H, A

In [28]:
def reconstruction_error(X, W, H, A):
    X_hat = np.dot(W, H) * A
    error = np.linalg.norm(X - X_hat, ord='fro')
    return error

In [34]:
# Generate a random non-negative matrix X
X = np.random.rand(100, 50)

# Apply ANMF with k=10, alpha=1, and beta=1
W, H, A = ANMF(X, k=10, alpha=1, beta=1)

# Calculate the reconstruction error
error = reconstruction_error(X, W, H, A)

# Print the reconstruction error
print("Reconstruction error:", error)

ValueError: operands could not be broadcast together with shapes (10,100) (10,10) 