## Matrix Factorization Version



\begin{eqnarray}
p(Y |W, H ) &=& \prod_j \left(\left(\prod_{i : Y(i,j)=1} \sigma(\sum_k W(i,k) H(k,j)) \right) \left(\prod_{i : Y(i,j)=0}(1- \sigma\left((\sum_k W(i,k) H(k,j))\right) \right) \right)
\end{eqnarray}

\begin{eqnarray}
\log p(Y |W, H ) &=& \sum_j \sum_{i : Y(i,j)=1} \log \sigma\left(\sum_k W(i,k) H(k,j)\right) + \sum_j \sum_{i : Y(i,j)=0} \log\left(1- \sigma(\sum_k W(i,k) H(k,j))\right) \\
\end{eqnarray}

\begin{eqnarray}
\log \sigma(x) & = & -\log(1+e^{-x}) = x - \log(1+e^{x}) \\
\log(1 - \sigma(x)) & = &  -\log({1+e^{x}})
\end{eqnarray}

Using the above properties of the sigmoid function

\begin{eqnarray}
\log p(Y |W, H ) &=& \sum_j\sum_{i : Y(i,j)=1} \left( \left(\sum_k W(i,k) H(k,j)\right) - \log \left(1 + \exp(\sum_k W(i,k) H(k,j))\right) \right) - \sum_j \sum_{i : Y(i,j)=0} \log\left( 1+ \exp\left(\sum_k W(i,k) H(k,j) \right)\right) \\
&=& \sum_j \sum_{i} Y(i,j) \left(\sum_k W(i,k) H(k,j)\right)  - \sum_j \sum_{i} \log\left( 1+\exp\left(\sum_k W(i,k) H(k,j)\right)\right) 
\end{eqnarray}

With missing values given as a mask matrix $M(i,j)$.

\begin{eqnarray}
\log p(Y |W, H ) &=& \sum_j \sum_{i} M(i,j) Y(i,j) \left(\sum_k W(i,k) H(k,j)\right)  - \sum_j \sum_{i} M(i,j) \log\left( 1+ \exp\left(\sum_k W(i,k) H(k,j)\right)\right) 
\end{eqnarray}

\begin{eqnarray}
\frac{\partial}{\partial W(i,k)} \log p(Y |W, H ) &=& \sum_j M(i,j) Y(i,j) H(k,j)  - \sum_j M(i,j) \sigma\left(\sum_k W(i,k) H(k,j)\right) H(k, j) \\
&=& \sum_j M(i,j) \left(Y(i,j) - \sigma\left(\sum_k W(i,k) H(k,j)\right) \right) H(k, j)
\end{eqnarray}

\begin{eqnarray}
\frac{\partial}{\partial H(k,j)} \log p(Y |W, H ) &=& \sum_i M(i,j) Y(i,j) W(i,k)  - \sum_j M(i,j) \sigma\left(\sum_k W(i,k) H(k,j)\right) W(i, k) \\
&=& \sum_j M(i,j) \left(Y(i,j) - \sigma\left(\sum_k W(i,k) H(k,j)\right) \right) W(i, k)
\end{eqnarray}


#### Evaluating the gradient (In matrix notation)

$$
\frac{d\mathcal{L}(W,H)}{dW} = (M \odot (Y -\sigma(W H)))H^\top
$$

$$
\frac{d\mathcal{L}(W,H)}{dH} = W^\top (M \odot (Y -\sigma(W H)))
$$


In [None]:
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pylab as plt


# Generate a random logistic regression problem

def sigmoid(t):
    return 1./(1+np.exp(-t))

M = 3
N = 2
K = 1

# Some random parameters
W_true = np.random.randn(M,K)
H_true = np.random.randn(K,N)

# Generate class labels
pi = sigmoid(np.dot(W_true, H_true))
Y = np.array(pi<np.random.rand(M,N),dtype=float)

#Mask = np.ones((M,N))
p_miss = 0.
Mask = np.array(p_miss<np.random.rand(M,N),dtype=float)

Mask_nan = Mask.copy()
Mask_nan[Mask==0] = np.nan

In [None]:
def LogisticMF(Y, K, Mask, eta=0.005, nu=0.1, MAX_ITER = 5000, PRINT_PERIOD=500):
    M = Y.shape[0]
    N = Y.shape[1]
    # Implement Gradient Descent
    W = np.random.randn(M,K)
    H = np.random.randn(K,N)

    
    for epoch in range(MAX_ITER):
        dL = np.dot(W.T, Mask*(Y-sigmoid(np.dot(W,H)))  ) - nu*H
        H = H + eta*dL
        dL = np.dot(Mask*(Y-sigmoid(np.dot(W,H))),H.T ) - nu*W
        W = W + eta*dL

        if epoch % PRINT_PERIOD == 0:
            LL = np.sum(Mask*(Y*np.log(sigmoid(np.dot(W,H))) +  (1-Y)*np.log(1 - sigmoid(np.dot(W,H))))) - nu*np.sum(H**2)/2. - nu*np.sum(W**2)/2. 
            LL_test = np.sum((1-Mask)*(Y*np.log(sigmoid(np.dot(W,H))) +  (1-Y)*np.log(1 - sigmoid(np.dot(W,H)))))  
            print epoch, LL, LL_test
        

    return W,H

W, H = LogisticMF(Y, K, Mask)

plt.imshow(Y, interpolation='nearest', cmap=plt.cm.gray_r)  
plt.title('Y (Full data)')
plt.show()
plt.imshow(Y*Mask_nan, interpolation='nearest')  
plt.title('Mask*Y (Observed data)')
plt.show()
plt.imshow(sigmoid(np.dot(W,H)), interpolation='nearest')  
plt.title('\sigma(W*H)')
plt.show()

plt.imshow(sigmoid(np.dot(W_true,H_true)), interpolation='nearest')  
plt.title('\sigma(W_true*H_true)')
plt.show()
plt.imshow(W, interpolation='nearest') 
plt.title('W')
plt.show()
plt.imshow(H, interpolation='nearest')  
plt.title('H')
plt.show()

In [None]:

def binary_random_mask_generator(M=30, N=150, p_miss=0.2):
    Mask = np.array(np.random.rand(M,N)>p_miss,dtype=float)

    Mask_nan = Mask.copy()
    Mask_nan[Mask==0] = np.nan
    
    return Mask, Mask_nan
    
def binary_random_matrix_generator1(M=30, N=150, p_on=0.3, p_switch=0.25):
    Y = np.zeros((M,N))
    y = np.array(np.random.rand(M,1)<p_on, dtype=float)
    for i in range(N):
        if np.random.rand()<p_switch:
            y = np.array(np.random.rand(M,1)<p_on, dtype=float)

        Y[:,i] = y.reshape(1,M)
    
    return Y

# Generate a catalog and reuse these
def binary_random_matrix_generator2(R=10, M=30, N=150, p_on=0.3, p_switch=0.25):
    Y = np.zeros((M,N))
    Catalog = np.array(np.random.rand(M,R)<p_on, dtype=float)
    idx = np.random.choice(range(R))
    for i in range(N):
        if np.random.rand()<p_switch:
            idx = np.random.choice(range(R))

        Y[:,i] = Catalog[:,idx].reshape(1,M)
    
    return Y

# Generate a catalog and reuse pairwise
def binary_random_matrix_generator3(R=10, M=30, N=150, p_on=0.3, p_switch=0.25):
    Y = np.zeros((M,N))
    Catalog = np.random.rand(M,R)<p_on
    
    sz = 2
    
    idx = np.random.choice(range(R), size=sz, replace=True)
    y = np.ones((1,M))<0
    for i in range(sz): 
        y = np.logical_or(y, Catalog[:,idx[i]])
    
    for i in range(N):
        if np.random.rand()<p_switch:
            idx = np.random.choice(range(R), size=sz, replace=True)
            y = np.ones((1,M))<0
            for i in range(sz): 
                y = np.logical_or(y, Catalog[:,idx[i]])
                
        Y[:,i] = y.reshape(1,M)    
    return Y

M = 20
N = 100

# Rank
K = 4

#Y = binary_random_matrix_generator1(M=M, N=N)
Y = binary_random_matrix_generator3(M=M, N=N, R=3)
Mask, Mask_nan = binary_random_mask_generator(M=M, N=N, p_miss=0.5)

W, H = LogisticMF(Y, K, Mask, eta=0.003, nu = 0.01, MAX_ITER=500000, PRINT_PERIOD=20000)

figsz = (15,4)
plt.figure(figsize=figsz)
plt.imshow(Y, interpolation='nearest', cmap=plt.cm.gray_r)  
plt.title('Y (Full data)')
plt.show()
plt.figure(figsize=figsz)
plt.imshow(Y*Mask_nan, interpolation='nearest')  
plt.title('Mask*Y (Observed data)')
plt.show()

    
plt.figure(figsize=figsz)
plt.imshow(sigmoid(np.dot(W,H)), interpolation='nearest')  
plt.title('\sigma(W*H)')
plt.colorbar(orientation='horizontal')
plt.show()
#plt.figure(figsize=figsz)
plt.imshow(W, interpolation='nearest')  
plt.show()
plt.figure(figsize=figsz)
plt.imshow(H, interpolation='nearest')  
plt.show()
