# MovieLens Recommendation System

In [14]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pylab as plt

#set column names
rnames = ['user_id', 'movie_id', 'rating', 'timestamp'] 

#get data and display first 3 of them
rating = pd.read_table('data/ml-1m/ratings.dat', sep='::', header=None, names=rnames, engine='python')
rating[0:3]

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968


In [15]:
ratings = np.mat(rating) #Ys

ratingCount=rating.shape[0]  #Ysize
userCount=np.max(ratings[:,0]) +1 #Nr
movieCount=np.max(ratings[:,1]) +1 #Nc

print(' User Count: ', userCount, '\n Movie Count: ', movieCount, '\n Rating Count: ', ratingCount)

(' User Count: ', 6041, '\n Movie Count: ', 3953, '\n Rating Count: ', 1000209)


In [16]:
print(ratings)
#Shuffle Ratings
np.random.shuffle(ratings)
print(ratings)

[[        1      1193         5 978300760]
 [        1       661         3 978302109]
 [        1       914         3 978301968]
 ..., 
 [     6040       562         5 956704746]
 [     6040      1096         4 956715648]
 [     6040      1097         4 956715569]]
[[     1939       112         3 974691508]
 [     1223       940         4 974837799]
 [     1647      1089         5 986541527]
 ..., 
 [     5960      1414         3 957028932]
 [      752      3785         3 991951512]
 [     4064      3507         4 965476888]]


In [17]:
A = np.mat(np.random.rand(userCount, 1))
B = np.mat(np.random.rand(1, movieCount))

M = np.zeros((userCount, movieCount))
Y = np.zeros((userCount, movieCount))
print(M)


[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]


In [20]:
for i in range(ratingCount):
    
    # Create the Mask matrix: M[user, movie]
    M[ratings[i,0], ratings[i,1]] = 1
    
    # Create the Y matrix with the original values from the ratings table
    Y[ratings[i,0] , ratings[i,1]] = ratings[i, 2]
    
print(Y)
print(M)

[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  5.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  3.  0. ...,  0.  0.  0.]]
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  1.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  1.  0. ...,  0.  0.  0.]]


In [21]:
# Create Eta, EPOCH 
EPOCH = 5
Eta = 0.1
eta = Eta

errors=[]
iterationCount= list(range(EPOCH))

for i in range(EPOCH):
    # average error
    E = np.multiply(M, np.subtract(Y, A*B))
    Err = np.sum(np.square(E)) / np.sum(M)
    
     # add to array
    errors.append(Err)
    
    for k in range(10000):
        u = ratings[k,0]
        m = ratings[k,1]
        
        # find error for each point
        err = ratings[k,2] - (A[u,:] * B[:,m])

        
       
        temp_A = A[u,:] + (eta * err[0,0] * B[:,m].T)
        B[:,m]   = B[:,m] + (eta * err[0,0] * A[u,:].T)
        A[u,:]   = temp_A
    
    eta = Eta*1./(i+1)
    
print(errors)  

[12.414920299422086, 4.7446584047273577, 3.3621610616728135, 3.0906106789937451, 3.0003532277521048]


In [22]:
compoundData = A * B
compoundData

matrix([[ 0.17247998,  2.05433885,  1.5430905 , ...,  0.68720022,
          0.68336547,  1.51379634],
        [ 0.10538988,  1.25525588,  0.94286949, ...,  0.41989768,
          0.41755455,  0.92496998],
        [ 0.13464153,  1.60366037,  1.20456909, ...,  0.53644303,
          0.53344954,  1.18170145],
        ..., 
        [ 0.17912107,  2.13343812,  1.60250491, ...,  0.71365985,
          0.70967744,  1.57208283],
        [ 0.25360677,  3.02060689,  2.26889045, ...,  1.01042811,
          1.00478967,  2.22581765],
        [ 0.27694918,  3.29862883,  2.47772309, ...,  1.10342968,
          1.09727226,  2.4306858 ]])

In [24]:
# Print data 10 movies of first 20 user
for j in range(20):
    
    userData = [(0,0)]
    
    for m in range (movieCount):
        movieData = (compoundData[j, m], m)
        userData.append(movieData)
        
    userData = sorted(userData, key=lambda tup: tup[0], reverse=True)     
    
    print("\nMovies for User =" , j+1 , " as follows: \n")
    
    for n in range(10):
        print (n+1, "\t", userData[n])

('\nMovies for User =', 1, ' as follows: \n')
(1, '\t', (2.2924104668113952, 608))
(2, '\t', (2.2834451922812651, 720))
(3, '\t', (2.2716302185839288, 1193))
(4, '\t', (2.2705060932482168, 908))
(5, '\t', (2.2704121301288653, 903))
(6, '\t', (2.2683083631357812, 2871))
(7, '\t', (2.2561395010255336, 1247))
(8, '\t', (2.2387886808784794, 3307))
(9, '\t', (2.2317177605089937, 322))
(10, '\t', (2.2291604393558173, 1206))
('\nMovies for User =', 2, ' as follows: \n')
(1, '\t', (1.4007239981995847, 608))
(2, '\t', (1.3952459761059812, 720))
(3, '\t', (1.3880267117396936, 1193))
(4, '\t', (1.3873398411475759, 908))
(5, '\t', (1.3872824271730164, 903))
(6, '\t', (1.3859969693736842, 2871))
(7, '\t', (1.3785614697389639, 1247))
(8, '\t', (1.3679596553953797, 3307))
(9, '\t', (1.3636391342695651, 322))
(10, '\t', (1.3620765427693875, 1206))
('\nMovies for User =', 3, ' as follows: \n')
(1, '\t', (1.7895041113907877, 608))
(2, '\t', (1.7825056284124186, 720))
(3, '\t', (1.773282609972459, 1193))