# Movie Rating Prediction

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine, correlation

In [2]:
uim_header = ['user_id', 'item_id', 'rating']
u_item_df = pd.read_csv('ml-100k/u.data', sep='\t', names=uim_header, usecols=range(3))

In [64]:
u_item_df.head(10)

Unnamed: 0,user_id,item_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1
5,298,474,4
6,115,265,2
7,253,465,5
8,305,451,3
9,6,86,3


In [4]:
users = u_item_df.user_id.unique().shape[0]
items = u_item_df.item_id.unique().shape[0]

print("Number of users:", users)
print("Number of items:", items)

Number of users: 943
Number of items: 1682


In [67]:
ratings = np.zeros((users, items))

for row in u_item_df.itertuples():
    
    ratings[row[1]-1, row[2]-1] = row[3]
    
print("\nUser-Item matrix:\n")
print(ratings)


User-Item matrix:

[[5. 3. 4. ... 0. 0. 0.]
 [4. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [5. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 5. 0. ... 0. 0. 0.]]


In [68]:
userSim_matrix = 1 - pairwise_distances(ratings, metric="cosine" )
np.fill_diagonal(userSim_matrix, 1)
print("\nUser-Similarity matrix:\n")
print(userSim_matrix.shape)
print(userSim_matrix[:4, :4])


User-Similarity matrix:

(943, 943)
[[1.         0.16693098 0.04745954 0.06435782]
 [0.16693098 1.         0.11059132 0.17812119]
 [0.04745954 0.11059132 1.         0.34415072]
 [0.06435782 0.17812119 0.34415072 1.        ]]


In [69]:
itemSim_matrix = 1 - pairwise_distances(ratings.T, metric="cosine" )
np.fill_diagonal(itemSim_matrix, 1 )
print("\nItem-Similarity matrix:\n")
print(itemSim_matrix.shape)
print(itemSim_matrix[:4, :4])


Item-Similarity matrix:

(1682, 1682)
[[1.         0.40238218 0.33024479 0.45493792]
 [0.40238218 1.         0.27306918 0.50257077]
 [0.33024479 0.27306918 1.         0.32486639]
 [0.45493792 0.50257077 0.32486639 1.        ]]


In [70]:
#Function to predict rating based on CF
'''Inputs:  User-item matrix
           Neighbourhood size
           User id
           Item id
           
   Outputs: u_pred, i_pred
'''

def predictRating(ratings, k, u, i):
    
    u_pred = 0
    i_pred = 0
    
    #User-based Collaborative Filtering
    
    mean_user_ratings = ratings.mean(axis=1)
    #print(mean_user_ratings[u])
    top_k_neigh_user = [np.argsort(userSim_matrix[:,u])[:-k-1:-1]]    
    #print(top_k_neigh)
        
    u_pred = userSim_matrix[u, :][top_k_neigh_user].dot(ratings[:, i][top_k_neigh_user])   
    u_pred /= np.sum(np.abs(userSim_matrix[u, :][top_k_neigh_user]))
    #print(u_pred)
    
    u_pred = mean_user_ratings[u] + u_pred
    #print(u_pred)
    
    #Item-based Collaborative Filtering
    
    mean_item_ratings = ratings.mean(axis=0)
    #print(mean_item_ratings[i])
    
    top_k_neigh_item = [np.argsort(itemSim_matrix[:,i])[:-k-1:-1]]
    #print(top_k_neigh_item)
    
    i_pred  = itemSim_matrix[i, :][top_k_neigh_item].dot(ratings[u, :][top_k_neigh_item].T) 
    i_pred /= np.sum(np.abs(itemSim_matrix[i, :][top_k_neigh_item]))
    #print(i_pred)
    
    i_pred = mean_item_ratings[i] + i_pred
    #print(i_pred)
    
    return u_pred, i_pred

In [71]:
print("\nExample: 1:  User-item matrix, Neighborhood size=10, User-id u=6, Item-id i=66")
u_pred, i_pred = predictRating(ratings, 10, 6, 66)
print("\nPredicted rating: User-based CF : ", u_pred)
print("Predicted rating: Item-based CF : ", i_pred)

print("\nExample: 2:  User-item matrix, Neighborhood size=15, User-id u=45, Item-id i=123")
u_pred, i_pred = predictRating(ratings, 15, 45, 123)
print("\nPredicted rating: User-based CF : ", u_pred)
print("Predicted rating: Item-based CF : ", i_pred)

print("\nExample: 3:  User-item matrix, Neighborhood size=25, User-id u=195, Item-id i=302")
u_pred, i_pred = predictRating(ratings, 25, 195, 302)
print("\nPredicted rating: User-based CF : ", u_pred)
print("Predicted rating: Item-based CF : ", i_pred)


Example: 1:  User-item matrix, Neighborhood size=10, User-id u=6, Item-id i=66

Predicted rating: User-based CF :  1.5156584813849512
Predicted rating: Item-based CF :  2.5067084558855424

Example: 2:  User-item matrix, Neighborhood size=15, User-id u=45, Item-id i=123

Predicted rating: User-based CF :  0.781233091111011
Predicted rating: Item-based CF :  1.623726662765576

Example: 3:  User-item matrix, Neighborhood size=25, User-id u=195, Item-id i=302

Predicted rating: User-based CF :  1.034796521296658
Predicted rating: Item-based CF :  1.2437771199224632
