In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse

%matplotlib inline

In [83]:
ucl_a=np.matrix( '0.0 1.0 5.0 0.0 4.0 5.0 ;0.0 0.0 3.0 -1.0 3.0 5.0 ;-2.0 -1.0 0.0 1.0 1.0 5.0;-4.0 0.0 -1.0 0.0 1.0 4.0; -5.0 -3.0 -1.0 -3.0 0.0 2.0; -10.0 -6.0 -2.0 -3.0 -3.0 0.0')
print(ucl_a)

u, s, vt = np.linalg.svd(ucl_a)

print(u,s,vt)

[[  0.   1.   5.   0.   4.   5.]
 [  0.   0.   3.  -1.   3.   5.]
 [ -2.  -1.   0.   1.   1.   5.]
 [ -4.   0.  -1.   0.   1.   4.]
 [ -5.  -3.  -1.  -3.   0.   2.]
 [-10.  -6.  -2.  -3.  -3.   0.]]
[[-0.08511855 -0.66108906  0.42033819 -0.42461485 -0.25685697  0.36436057]
 [-0.0016112  -0.54787869  0.23727552  0.35650192  0.25834022 -0.67059182]
 [ 0.164471   -0.37396178 -0.56787714 -0.18327623  0.64220232  0.25417206]
 [ 0.26238014 -0.29315107 -0.59413033  0.04388905 -0.66927545 -0.20584555]
 [ 0.44724183 -0.11123002  0.16549362  0.70590033 -0.03363178  0.51067419]
 [ 0.8347652   0.15694943  0.25328383 -0.39849331  0.07615963 -0.22312337]] [14.81225504 11.88878842  3.94778988  1.96487448  1.66261568  0.20914819] [[-0.80759639 -0.44557066 -0.18967934 -0.2484388  -0.16356404  0.15748517]
 [ 0.07630581 -0.07529226 -0.40867067  0.00309188 -0.45639246 -0.7830665 ]
 [ 0.03849476 -0.26039089  0.69294128 -0.52218712  0.11938841 -0.40449246]
 [ 0.32899186  0.01624544 -0.51218092 -0.74406749  

In [105]:
def FunkSVD(ratings_mat, latent_features=4, learning_rate=0.0001, iters=100):
    '''
    This function performs matrix factorization using a basic form of FunkSVD with no regularization
    
    INPUT:
    ratings_mat - (numpy array) a matrix with users as rows, movies as columns, and ratings as values
    latent_features - (int) the number of latent features used
    learning_rate - (float) the learning rate 
    iters - (int) the number of iterations
    
    OUTPUT:
    user_mat - (numpy array) a user by latent feature matrix
    movie_mat - (numpy array) a latent feature by movie matrix
    '''
    
    # Set up useful values to be used through the rest of the function
    n_users = ratings_mat.shape[0]
    n_movies = ratings_mat.shape[1]
    num_ratings = np.count_nonzero(~np.isnan(ratings_mat)) #-n_users
    
    # initialize the user and movie matrices with random values
    home_mat = np.random.rand(n_users, latent_features)
    away_mat = np.random.rand(latent_features, n_movies)
    
    # initialize sse at 0 for first iteration
    sse_accum = 0
    
    # header for running results
    print("Optimizaiton Statistics")
    print("Iterations | Mean Squared Error ")
    #print(home_mat)
    #print(away_mat)
    # for each iteration
    for iteration in range(iters):

        # update our sse
        old_sse = sse_accum
        sse_accum = 0
        
        # For each user-movie pair
        for i in range(n_users):
            for j in range(n_movies):
 
                
                # if the rating exists
                #if i,j>=0:
                    
                    # compute the error as the actual minus the dot product of the user and movie latent features
                    diff = ratings_mat[i, j] - np.dot(home_mat[i, :], away_mat[:, j])
                    
                    # Keep track of the sum of squared errors for the matrix
                    sse_accum += diff**2
                    
                    # update the values in each matrix in the direction of the gradient
                    for k in range(latent_features):
                        home_mat[i, k] += learning_rate * (2*diff*away_mat[k, j])
                        away_mat[k, j] += learning_rate * (2*diff*home_mat[i, k])

        # print results for iteration
        if iteration in {0,1,iters-1}:
            print("%d \t\t %f" % (iteration+1, sse_accum / num_ratings))
        
    return home_mat, away_mat 

In [106]:
home_mat, away_mat = FunkSVD(ucl_a, latent_features=6, learning_rate=0.005, iters=500)

Optimizaiton Statistics
Iterations | Mean Squared Error 
1 		 11.968661
2 		 10.127342
500 		 0.000003


In [107]:
print(home_mat)
print(away_mat)

print(np.dot(home_mat, away_mat)-ucl_a)


[[ 1.24167653  1.36301916  0.97504756  0.43058105  0.73869796  0.58119936]
 [ 0.38778038  0.40802045  1.59187865  0.53995948  0.79057684  0.52554284]
 [ 0.33788113 -0.60548512  0.28424658  0.31221046  1.18859717  1.50369274]
 [-0.41734952  0.39679191  0.03460311  0.10459401  0.09264511  1.98625794]
 [-0.11124066 -0.41752904  1.29275955 -0.73592158 -0.71975373  1.05744812]
 [ 1.1638132  -1.28639586  0.40129304 -0.51536231 -2.06253917  1.76668957]]
[[-0.64240702 -0.78903003  1.55823679  0.18574101  0.23741081  0.37206813]
 [ 0.66921604  1.49431514  1.3794533   0.0983849   1.43941298  0.57053239]
 [-0.34782524 -0.64268895  0.85649905 -1.38261544  0.88353947  1.35853986]
 [ 0.72933757  0.69445826  0.53051671  0.45699298 -0.02137006  0.37663482]
 [ 1.76526855  0.78253496  0.57608726  1.08792631  1.04181896  1.61676375]
 [-2.39722501 -0.52659658 -0.52163211 -0.03105426  0.20378925  1.85881372]]
[[ 1.19993071e-04  1.42776389e-03  9.87313111e-04 -1.01230914e-03
  -2.94190941e-03  1.08905853e-0