In [1]:
import numpy as np
a = np.array([[2,3],[4,2],[3,3],[5,1]])
b = np.array([[2,3,4,3],[4,5,1,2]])
a.dot(b)

array([[16, 21, 11, 12],
       [16, 22, 18, 16],
       [18, 24, 15, 15],
       [14, 20, 21, 17]])

In [2]:
c = ([[16, 21, 11, 12], [16, 22, 18, 16], [18, 24, 15, 15],[14, 20, 21, 17]])
cprime = ([[0, 21, 0, 0], [16, 0, 0, 16], [0, 0, 15, 0],[14, 0, 0, 0]])

In [3]:
from sklearn.decomposition import NMF
model = NMF(n_components = 2)
model.fit_transform(cprime)

array([[ 0.        ,  4.58257569],
       [ 4.44455063,  0.        ],
       [ 0.        ,  0.        ],
       [ 2.30395908,  0.        ]])

In [4]:
model.components_

array([[ 4.12445831,  0.        ,  0.        ,  2.83744628],
       [ 0.        ,  4.58257569,  0.        ,  0.        ]])

In [5]:
a2 = model.transform(cprime)
b2 = model.components_

In [6]:
a2.dot(b2)

array([[  0.        ,  21.        ,   0.        ,   0.        ],
       [ 18.33136378,   0.        ,   0.        ,  12.61117366],
       [  0.        ,   0.        ,   0.        ,   0.        ],
       [  9.50258317,   0.        ,   0.        ,   6.53736013]])

In [7]:
cprime

[[0, 21, 0, 0], [16, 0, 0, 16], [0, 0, 15, 0], [14, 0, 0, 0]]

In [8]:
from scipy.sparse import csr_matrix
s = csr_matrix(cprime)

In [9]:
s

<4x4 sparse matrix of type '<type 'numpy.int64'>'
	with 5 stored elements in Compressed Sparse Row format>

In [10]:
s.toarray()

array([[ 0, 21,  0,  0],
       [16,  0,  0, 16],
       [ 0,  0, 15,  0],
       [14,  0,  0,  0]])

In [17]:
import numpy as np
from scipy import linalg
from numpy import dot

def nmf(X, latent_features, max_iter=100, error_limit=1e-6, fit_error_limit=1e-6):
    """
    Decompose X to A*Y
    """
    eps = 1e-5
    print 'Starting NMF decomposition with {} latent features and {} iterations.'.format(latent_features, max_iter)
    X = X.toarray()  # I am passing in a scipy sparse matrix

    # mask
    mask = np.sign(X)

    # initial matrices. A is random [0,1] and Y is A\X.
    rows, columns = X.shape
    A = np.random.rand(rows, latent_features)
    A = np.maximum(A, eps)

    Y = linalg.lstsq(A, X)[0]
    Y = np.maximum(Y, eps)

    masked_X = mask * X
    X_est_prev = dot(A, Y)
    for i in range(1, max_iter + 1):
        # ===== updates =====
        # Matlab: A=A.*(((W.*X)*Y')./((W.*(A*Y))*Y'));
        top = dot(masked_X, Y.T)
        bottom = (dot((mask * dot(A, Y)), Y.T)) + eps
        A *= top / bottom

        A = np.maximum(A, eps)
        # print 'A',  np.round(A, 2)

        # Matlab: Y=Y.*((A'*(W.*X))./(A'*(W.*(A*Y))));
        top = dot(A.T, masked_X)
        bottom = dot(A.T, mask * dot(A, Y)) + eps
        Y *= top / bottom
        Y = np.maximum(Y, eps)
        # print 'Y', np.round(Y, 2)


        # ==== evaluation ====
        if i % 5 == 0 or i == 1 or i == max_iter:
            print 'Iteration {}:'.format(i),
            X_est = dot(A, Y)
            err = mask * (X_est_prev - X_est)
            fit_residual = np.sqrt(np.sum(err ** 2))
            X_est_prev = X_est

            curRes = linalg.norm(mask * (X - X_est), ord='fro')
            print 'fit residual', np.round(fit_residual, 4),
            print 'total residual', np.round(curRes, 4)
            if curRes < error_limit or fit_residual < fit_error_limit:
                break

    return A, Y

In [18]:
a4, b4 = nmf(s,2)

Starting NMF decomposition with 2 latent features and 100 iterations.
Iteration 1: fit residual 12.4158 total residual 1.6372
Iteration 5: fit residual 1.6342 total residual 0.0029
Iteration 10: fit residual 0.0029 total residual 0.0
Iteration 15: fit residual 0.0 total residual 0.0
Iteration 20: fit residual 0.0 total residual 0.0


In [13]:
a4.dot(b4)

array([[  6.37069548,  20.9999852 ,   0.98464244,   9.31349884],
       [ 15.99999402,  31.78460024,  10.94484366,  15.99998943],
       [ 10.17396708,   0.32143107,  14.9999887 ,   3.15951508],
       [ 13.99999265,  14.70730474,  14.87418739,   9.37852226]])

In [19]:
c

[[16, 21, 11, 12], [16, 22, 18, 16], [18, 24, 15, 15], [14, 20, 21, 17]]

In [33]:
cprime

[[0, 21, 0, 0], [16, 0, 0, 16], [0, 0, 15, 0], [14, 0, 0, 0]]

In [34]:
a4

array([[ 1.15112748,  0.03276278],
       [ 2.25874565,  0.97659106],
       [ 0.2039814 ,  1.02174821],
       [ 1.88320918,  1.29238263]])

In [35]:
b4

array([[  6.48666656e+00,   1.82429760e+01,   1.00000000e-05,
          7.08356739e+00],
       [  1.38058662e+00,   1.00000000e-05,   1.46807090e+01,
          1.90210817e-05]])