In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error,mean_absolute_error

In [2]:
ratings_matrix = pd.read_pickle('ratings_matrix.pkl')
#ratings_with_name = pd.read_pickle('ratings_with_name.pkl')

In [3]:
corr_matrix = ratings_matrix.corr(method='pearson', min_periods=30)

In [4]:
user_ratings= []
for i in range(0,len(ratings_matrix)):
    user_ratings.append(ratings_matrix.loc[i].dropna())

In [5]:
user_ratings[0]

Movie:Pretty Woman                         4.0
Movie:P.S I Love You                       4.0
Movie:Memento                              1.0
Movie:L.A Confidential                     3.0
Movie:Taken                                5.0
Movie:Forrest Gump                         3.0
Movie:Indiana Jones                        4.0
Movie:Avatar                               5.0
Movie:Gone Girl                            5.0
Movie:Da Vinci Code                        3.0
Book:Girl on the Train                     2.0
Book:Gone Girl].                           4.0
Book:Girl with the Dragon tattoo           4.0
Book:Kane and Abel                         3.0
Book:Goosebumps                            3.0
Book:Treasure Island                       2.0
Book:Moby Dick                             4.0
Book:Harry Potter : Philosopher’s Stone    4.0
Name: 0, dtype: float64

In [6]:
def generate_similar_items_list(corr_mat,user_rat):
    final_recommendations = []
    
    for i in range(0,len(user_rat)):
        recommendations = pd.Series()
        for j in range(0,len(user_rat[i].index)):
            rated_items = user_rat[i].index[j]
            similar_items = corr_mat[rated_items].dropna()
            similar_items = similar_items.map(lambda x : x * user_rat[i][j])
            recommendations = recommendations.append(similar_items)
        recommendations = recommendations.groupby(recommendations.index).sum()
        final_recommendations.append(recommendations)
    
    return final_recommendations

In [7]:
recommendations = generate_similar_items_list(corr_mat=corr_matrix, user_rat= user_ratings)

In [8]:
recommendations[14].sort_values(ascending=False) #recommendations for the 14th index user (HIGHER VALUE IMPLIES HIGHER RATING FOR THE PARTICULAR USER)

Book:Da vinci Code                             55.540689
Movie:Sherlock(Series)                         54.427771
Movie:24(Series)                               54.164633
Book:The Shining                               54.039158
Movie:The Girl with the Dragon Tattoo          53.393771
Movie:Prison Break(Series)                     52.665907
Book:Around The World in 80 Days               52.628653
Book:Hounds Of Baskerville(Sherlock Holmes)    52.309965
Book:Hardy Boys                                52.268291
Movie:Taken                                    51.943786
Movie:Forrest Gump                             51.585384
Movie:Doctor Who(Series)                       50.070370
Book:Murder on the orient express              49.901155
Book:Famous Five                               49.842021
Movie:Gone Girl                                49.668400
Book:Gone Girl].                               49.179360
Movie:Avatar                                   48.207454
Book:Pride and Prejudice       

In [9]:
def scaled_ratings(recommendations):
    scaled_recommendations = []
    for i in range(0,len(recommendations)):
        rec = recommendations[i].sort_values(ascending = False)
        rec = (rec - rec.min())/(rec.max() - rec.min()) #normalize it to 0-1
        rec = rec*4 + 1  #scale it for 1-5 rating range
        rec = np.rint(rec) #get it to the nearest integer
        scaled_recommendations.append(rec)
    return scaled_recommendations    

In [10]:
scaled_recommendations =  scaled_ratings(recommendations)

In [12]:
scaled_recommendations[14] #scales everything to an integer between 1-5

Book:Da vinci Code                             5.0
Movie:Sherlock(Series)                         5.0
Movie:24(Series)                               5.0
Book:The Shining                               5.0
Movie:The Girl with the Dragon Tattoo          5.0
Movie:Prison Break(Series)                     5.0
Book:Around The World in 80 Days               5.0
Book:Hounds Of Baskerville(Sherlock Holmes)    5.0
Book:Hardy Boys                                5.0
Movie:Taken                                    5.0
Movie:Forrest Gump                             5.0
Movie:Doctor Who(Series)                       5.0
Book:Murder on the orient express              5.0
Book:Famous Five                               5.0
Movie:Gone Girl                                5.0
Book:Gone Girl].                               5.0
Movie:Avatar                                   4.0
Book:Pride and Prejudice                       4.0
Movie:Pretty Woman                             4.0
Book:The Kite Runner           

In [13]:
def error_calculation(predicted_ratings,orig_ratings_mat):
    """
    predicted_ratings and orig_ratings_mat should be of same length
    
    """
    error_array = []
    for i in range(0,len(predicted_ratings)):
        common_items = predicted_ratings[i].index.intersection(orig_ratings_mat.loc[i].dropna().index)
        pred = predicted_ratings[i][common_items]
        orig = orig_ratings_mat.loc[i][common_items]
        pred  = np.array(pred)
        orig  = np.array(orig)
        error_score = mean_absolute_error(orig,pred)
        error_array.append(error_score)
    return error_array

In [14]:
errors = error_calculation(scaled_recommendations,ratings_matrix)

In [15]:
errors = np.array(errors)

In [16]:
errors.mean()

0.8411153557831221

In [17]:
predicted_for_user = scaled_recommendations[14]
actual_user_ratings = user_ratings[14]

In [18]:
common = predicted_for_user.index.intersection(actual_user_ratings.index)

In [19]:
predicted_for_user = predicted_for_user[common]

In [20]:
compare_df  = pd.concat([predicted_for_user,actual_user_ratings],axis = 1,sort = True)

In [21]:
compare_df.rename(index = str , columns={0 : "Predicted" , 14 : "Actual"})

Unnamed: 0,Predicted,Actual
Book:1984,3.0,4.0
Book:Alice in the Wonderland,4.0,4.0
Book:Around The World in 80 Days,5.0,3.0
Book:Da vinci Code,5.0,5.0
Book:Death on the Nile,4.0,4.0
Book:Famous Five,5.0,4.0
Book:Goosebumps,3.0,2.0
Book:Hardy Boys,5.0,4.0
Book:Harry Potter : Half Blood Prince,4.0,5.0
Book:Harry Potter : Philosopher’s Stone,4.0,4.0
