In [25]:
import numpy
import pandas

###############################################################################

"""
@INPUT:
    R     : a matrix to be factorized, dimension N x M
    P     : an initial matrix of dimension N x K
    Q     : an initial matrix of dimension M x K
    K     : the number of latent features
    steps : the maximum number of steps to perform the optimisation
    alpha : the learning rate
    beta  : the regularization parameter
@OUTPUT:
    the final matrices P and Q
"""

def Beta_Gradient_Descent_Matrix_Factorization(Observed_Matrix, Users_Matrix, Items_Matrix, Features_Number, steps=5000, learning_rate=0.0002, beta=0.02):
    
    Items_Matrix = Items_Matrix.T
    
    for step in range(steps):
        
        for row in range(len(Observed_Matrix)):
            for col in range(len(Observed_Matrix[row])):
                if Observed_Matrix[row][col] > 0:
                    prediction = numpy.dot(Users_Matrix[row,:],Items_Matrix[:,col])
                    target = Observed_Matrix[row][col]
                    local_error = target - prediction
                    for feat in range(Features_Number):
                        Users_Matrix[row][feat] = Users_Matrix[row][feat] + learning_rate * (2 * local_error * Items_Matrix[feat][col] - beta * Users_Matrix[row][feat])
                        Items_Matrix[feat][col] = Items_Matrix[feat][col] + learning_rate * (2 * local_error * Users_Matrix[row][feat] - beta * Items_Matrix[feat][col])
                        
                        
        eR = numpy.dot(Users_Matrix, Items_Matrix)
        e = 0
        for row in range(len(Observed_Matrix)):
            for col in range(len(Observed_Matrix[row])):
                if Observed_Matrix[row][col] > 0:
                    e = e + pow(Observed_Matrix[row][col] - numpy.dot(Users_Matrix[row,:],Items_Matrix[:,col]), 2)
                    for feat in range(Features_Number):
                        e = e + (beta/2) * ( pow(Users_Matrix[row][feat],2) + pow(Items_Matrix[feat][col],2) )
        if e < 0.001:
            break
    return Users_Matrix, Items_Matrix.T

###############################################################################

if __name__ == "__main__":
    Observed_Matrix = numpy.array([
         [5,3,0,1],
         [4,0,0,1],
         [1,1,0,5],
         [1,0,0,4],
         [0,1,5,4],
        ])

    N = len(Observed_Matrix)
    M = len(Observed_Matrix[0])
    Features_Number = 2

    Users_Matrix = numpy.random.rand(N,K)
    Items_Matrix = numpy.random.rand(M,K)

    Predicted_Users_Matrix, Predicted_Items_Matrix = Beta_Gradient_Descent_Matrix_Factorization(Observed_Matrix, Users_Matrix, Items_Matrix, Features_Number)
    print(numpy.dot(Predicted_Users_Matrix, Predicted_Items_Matrix.T))

[[ 5.00462761  2.90291256  5.01626632  0.99679008]
 [ 3.95474279  2.2994538   4.12494947  0.99603359]
 [ 1.09522611  0.7609651   4.76219447  4.96005035]
 [ 0.95001702  0.65133341  3.87597882  3.9726729 ]
 [ 2.08236019  1.30394882  4.88863685  4.03992074]]


In [22]:
class MoviesClassifier(object):
    
    
    def __init__(self):
        self.Users_Matrix = None
        self.Items_Matrix = None
        self.User_To_Index = dict()
        self.Movie_To_Index = dict()
    
    
    def Train(self, file_name):
        
        Observed_Matrix = 
        
        N = len(Observed_Matrix)
        M = len(Observed_Matrix[0])
        
        Features_Number = 2

        self.Users_Matrix = numpy.random.rand(N,K)
        self.Items_Matrix = numpy.random.rand(M,K)

        self.Gradient_Descent_Matrix_Factorization(Observed_Matrix, Features_Number)
        return
        
     
    def Predictions(self, users_movies_vector):
        predictions = list()
        for entry in users_movies_vector:
            user_index = self.User_To_Index[entry.userID]
            movie_index = self.Movie_To_Index[entry.movieID]
            user_row = self.Users_Matrix[user_index]
            movie_row = self.Items_Matrix[movie_index]
            prediction = self.Predict(user_row, movie_row.T)
            predictions.append(str(entry.testID) + ", " + str(prediction))
            
        open("output.csv", "w").write(predictions)
        
        
        
    def Predict(self, user_row, item_col):
        prediction = numpy.dot(user_row,item_col)
        return prediction
        
        
    def Error_Function(target, prediction):
        return pow(target - prediction, 2)


    def Error_Partial_Derivative_Users(self, target, prediction, item):
        return 2 * (target - prediction) * item


    def Error_Partial_Derivative_Items(self, target, prediction, user):
        return 2 * (target - prediction) * user


    def Is_Observed(self, entry):
        return entry > 0


    def Gradient_Descent_Matrix_Factorization(self, Observed_Matrix, Features_Number, steps=5000, learning_rate=0.0002, beta=0.02):

        Items_Matrix = self.Items_Matrix.T

        for step in range(steps):

            total_error = 0.0

            for row in range(len(Observed_Matrix)):
                for col in range(len(Observed_Matrix[row])):
                    if self.Is_Observed(Observed_Matrix[row][col]) == True:

                        target = Observed_Matrix[row][col]

                        user_row = Users_Matrix[row,:]
                        item_col = Items_Matrix[:,col]
                        prediction = self.Predict(user_row,item_col)

                        for feat in range(Features_Number):
                            Users_Matrix[row][feat] = Users_Matrix[row][feat] + learning_rate * self.Error_Partial_Derivative_Users(target, prediction, Items_Matrix[feat][col])
                            Items_Matrix[feat][col] = Items_Matrix[feat][col] + learning_rate * self.Error_Partial_Derivative_Items(target, prediction, Users_Matrix[row][feat])

                        user_row = Users_Matrix[row,:]
                        item_col = Items_Matrix[:,col]
                        prediction = numpy.dot(user_row,item_col)

                        total_error = total_error + self.Error_Function(target, prediction)

            if total_error < 0.001:
                break

        self.Users_Matrix = self.Users_Matrix
        self.Items_Matrix = self.Items_Matrix.T
        return


In [36]:
movies_classifier = MoviesClassifier()

In [46]:
train_set = pandas.read_csv('user_ratedmovies_train.dat','\t')
print(train_set[train_set.userID == 6785])

        Unnamed: 0  userID  movieID  rating  date_day  date_month  date_year  \
654          77860    6785      904     4.5        22           7       2006   
1851         77997    6785     3683     3.5         5           1       2006   
3843         77933    6785     1954     4.0        15           1       2006   
3935         77888    6785     1225     4.0         5           1       2006   
9820         78049    6785     6539     3.5         5           1       2006   
10101        78055    6785     6785     3.5         1           3       2008   
13844        78070    6785     7325     4.0         5           1       2006   
14029        77827    6785      101     5.0         6           1       2006   
15348        77824    6785       52     3.5         7           1       2006   
18679        77901    6785     1270     4.0         5           1       2006   
21409        78099    6785    44191     3.5        14           7       2006   
21924        77972    6785     2973     

In [None]:
movies_classifier.Train(train_set)

In [30]:
unknown_set = pandas.read_csv('predictions.dat','\t')
print(unknown_set)

       testID  userID  movieID
0           0    6785     2599
1           1   10783      339
2           2   64642    58162
3           3    6971      296
4           4   48802     1387
5           5   61805     5029
6           6   26738     3729
7           7   63198    45672
8           8    5925      762
9           9   57835     1411
10         10   19066     3504
11         11   52136    45728
12         12   28645     2763
13         13   13472      253
14         14   24515    59784
15         15   70100    44665
16         16   28855     2788
17         17   45478     7149
18         18   35488     6942
19         19   12554     1587
20         20   24495     4232
21         21     267     2966
22         22   63375     8813
23         23   57512     1617
24         24   41154     1479
25         25   19587     5292
26         26    4549      515
27         27   41027     8665
28         28   35814     1569
29         29   18367     3755
...       ...     ...      ...
84970   

In [None]:
movies_classifier.Predict(unknown_set)