In [25]:
import numpy as np
import sklearn

In [32]:
data_path = '../datasets/'
data = np.nan_to_num(np.load(data_path + 'ratings_train.npy'))

class Solve:

    def __init__(self, k, mu, alpha, beta, train_data, descent_method = 'SGD', n_steps = 100):
        self.k = k
        self.mu = mu
        self.alpha = alpha
        self.beta = beta
        self.data = np.copy(train_data)
        self.non_nan = np.argwhere(~np.isnan(data))
        self.descent = descent_method
        self.I = np.random.rand(len(self.data), self.k) #Generating random matrices, maybe a better initialization can be initialized
        self.U = np.random.rand(len(self.data[0]), self.k).T

        self.n_steps = n_steps
    
    def compute_sgd(self):
        d_I, d_U = 0, 0
        for (i, j) in self.non_nan:
            eij = data[i][j] - np.dot(self.I[i,:],self.U[:,j])
            for k in range(self.k):
                d_I += self.I[i][k] + self.alpha * (2 * eij * self.U[k][j] - self.mu * self.I[i][k])
                d_U += self.U[k][j] + self.beta * (2 * eij * self.I[i][k] - self.mu * self.U[k][j])

        return d_I,d_U
    
    def train(self, output_loss=False):
        loss = []
        for _ in range(self.n_steps):
            if output_loss:
                e = 0
                for (i,j) in self.non_nan:
                    e = e + pow(self.data[i][j] - np.dot(self.I[i,:], self.U[:,j]), 2)
                    for k in range(self.k):
                        e = e + (self.mu/2) * (pow(self.I[i][k],2) + pow(self.U[k][j], 2))

                loss.append(e)

            for (i, j) in self.non_nan:
                eij = data[i][j] - np.dot(self.I[i,:], self.U[:,j])
                for k in range(self.k):
                    self.I[i, k] = self.I[i, k] + self.alpha * (2 * eij * self.U[k, j] - self.mu * self.I[i, k])
                    self.U[k, j] = self.U[k, j] + self.beta * (2 * eij * self.I[i, k] - self.mu * self.U[k, j])
                    
                    #self.I[i, k] = np.dot(eij, self.U[k, j]) * pow(np.dot(self.U[k, j].T, self.U[k, j]) + self.mu * np.eye(self.k), -1)
                    #self.U[k, j] = np.dot(eij.T, self.I[i, k]) * pow(np.dot(self.I[i, k].T, self.I[i, k]) + self.mu * np.eye(self.k), -1)
        
        return loss

    def rmse(self, test_matrix):
        # diffs = 0
        # predictions = self.predict()
        # T = len(np.argwhere(~np.isnan(test_matrix)))
        # for (i, j) in np.argwhere(~np.isnan(test_matrix)):
        #     diff = (test_matrix[i, j] - predictions[i, j])**2
        #     diffs += diff
        # return np.sqrt(diffs/T)
        masked = np.ma.array(test_matrix, mask=np.isnan(test_matrix))
        predictions = self.I@self.U
        diff = np.ma.subtract(predictions, masked)
        squared = np.ma.power(diff, 2)
        return np.ma.sqrt(np.ma.mean(squared))


    def predict(self):
        return self.I@self.U

if __name__ == '__main__':
    data_path = '../datasets/'
    data = np.load(data_path + 'ratings_train.npy')
    test_data = np.load(data_path + 'ratings_test.npy')

    solver = Solve(k=5, mu = 0.02, alpha = 0.0005, beta = 0.0005, train_data=data, n_steps=50)
    pred = solver.train()
    rmse = solver.rmse(test_data)
    train_rmse = solver.rmse(data)
    print(f"RMSE against TRAIN: {train_rmse}")
    print(f"RMSE against TEST: {rmse}")

ValueError: setting an array element with a sequence.

In [None]:
elif self.descent == 'ALS':
                self.I = self.data@self.U@(self.U.T@self.U + self.lam*np.eye(self.k))**(-1)
                self.U = self.data.T@self.I@(self.I.T@self.I + self.mu*np.eye(self.k))**(-1)

In [24]:
m = data.shape[0]
n = data.shape[1]
k = 5
mu = .1
lam = .1
step_I = .00007
step_U = .00007
I = np.ones((m, k))
U = np.ones((n, k))
V = 3 * np.random.rand(k,m)
W = 3 * np.random.rand(k,n)
R = np.nan_to_num(data, copy=True)

iters = 100

for i in range(iters):
  loss = np.linalg.norm((R - I@U.T), ord='fro')**2 + mu*np.linalg.norm(I, ord='fro')**2 + lam*np.linalg.norm(U, ord='fro')**2

  #print(f'Loss at iter {i+1}: {loss}')

  grad_U = -2*R.T@I + 2*U@I.T@I + 2*mu*U
  grad_I = -2*R@U + 2*I@U.T@U + 2*lam*I
  
  grad_I_als = R@U.dot(U.T@U + lam*np.eye(k))**(-1)
  grad_U_als = R.T@I.dot(I.T@I + mu*np.eye(k))**(-1)
  
  V = grad_I_als
  W = grad_U_als

  U -= step_U*grad_U
  I -= step_I*grad_I


rmse = np.sqrt(np.mean((I@U.T-R)**2))
print(rmse)

rmse_als = np.sqrt(np.mean((V@W.T-R)**2))
print(rmse_als)

0.3630064163361999
257.69673403705565


In [27]:
data_path = '../datasets/'
data = np.load(data_path + 'ratings_train.npy')
test_data = np.load(data_path + 'ratings_test.npy')

step_I = .00007
step_U = .00007

mu = .1
lam = .1

K = 5
I = np.random.rand(len(data),K) #Generating random matrices, maybe a better initialization can be initialized
U = np.random.rand(len(data[0]),K).T


non_nan = np.argwhere(~np.isnan(data))

for (i, j) in non_nan:
  eij = data[i][j] - np.dot(I[i,:],U[:,j])

  for k in range(K):
    I[i][k] = I[i][k] + step_I * (2 * eij * U[k][j] - mu * I[i][k])
    U[k][j] = U[k][j] + step_U * (2 * eij * I[i][k] - lam * U[k][j])