In [1]:
# importing necessary packages
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from math import sqrt

In [2]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [17]:
# Loading the predicted and true ratings .csv file into a dataframe
#alg = "SVD"
#alg = "NMF"
#alg = 'CoClustering'
alg = 'KNNBaseline'
#alg = 'NeuralNet_Prediction'
#alg = 'Ensemble'
predicted_dataframe = pd.read_csv('/content/gdrive/My Drive/'+ alg +'.csv')
predicted_dataframe.head()

Unnamed: 0,userId,movieId,predicted_rating,true_rating
0,1,1193,4.600525,5
1,1,3408,4.184837,4
2,1,919,4.66706,4
3,1,2797,4.214397,4
4,1,720,4.659941,3


In [18]:
# loading movies dataset into a file
movies_dataframe = pd.read_csv('/content/gdrive/My Drive/movies.dat', sep='::', usecols=[0,1], names=['movieId', 'title'])
dict_movies = pd.Series(movies_dataframe['title'].values,index=movies_dataframe['movieId']).to_dict()

  


In [19]:
# code to get all users form the dataframe
users = predicted_dataframe['userId'].unique().tolist()

In [20]:
# code to get sorted list of recommendations for each user id
dataframe_1 = predicted_dataframe[['userId', 'movieId', 'predicted_rating']]
dataframe_1['predicted_movies'] = dataframe_1.apply(lambda x: (x['movieId'], x['predicted_rating']), axis=1)
dataframe_2 = dataframe_1[['userId', 'predicted_movies']]
dataframe_sort = dataframe_2.groupby('userId')['predicted_movies'].apply(list).reset_index(name='recommendation')
# sorting
dataframe_sort['recommendation'] = dataframe_sort['recommendation'].apply( lambda x: sorted(x, key=lambda tup: tup[1], reverse=True) )
recommendation_userid_sort = pd.Series(dataframe_sort['recommendation'].values,index=dataframe_sort['userId']).to_dict()

In [21]:
# code to get sorted list of ratings for each user
true_dataframe = predicted_dataframe[['userId', 'movieId', 'true_rating']]
true_dataframe['movies'] = true_dataframe.apply(lambda x: (x['movieId'], x['true_rating']), axis=1)
true_dataframe_1 = true_dataframe[['userId', 'movies']]
true_dataframe_sort = true_dataframe_1.groupby('userId')['movies'].apply(list).reset_index(name='recommendation')
# sorting
true_dataframe_sort['recommendation'] = true_dataframe_sort['recommendation'].apply(lambda x: sorted(x, key=lambda tup: tup[1], reverse=True))
true_rating_userid_sort = pd.Series(true_dataframe_sort['recommendation'].values,index=true_dataframe_sort['userId']).to_dict()

In [22]:
# function to return top 10 recommendations
def n_top_recommendation(user, n=10):
  n_top_rec = recommendation_userid_sort[user][:n]
  return [dict_movies[int(x[0])] for x in n_top_rec]

# function to return top 10 true ratings
def n_top_true_ratings(user, n=10):
  n_top_ratings = true_rating_userid_sort[user][:n]
  return [dict_movies[int(x[0])] for x in n_top_ratings]

In [23]:
# code to calculate the precision and recall values of each user
n_top_recommendation_user = {}
out_u = []
for u in users:
  list_out = []
  # appending each user to the users list
  list_out.append(u)
  # top n recommendations (n = 10 by default)
  n_top_reco_for_user = n_top_recommendation(u)
  n_top_recommendation_user[u] = n_top_reco_for_user
  n_top_trueratings_user = n_top_true_ratings(u)
  # claculating true positives, false positives, false negatives
  tp = list(set(n_top_reco_for_user) & set(n_top_trueratings_user))
  fp = list(set(n_top_reco_for_user) - set(n_top_trueratings_user))
  fn = list(set(n_top_trueratings_user) - set(n_top_reco_for_user))
  # calculating precision for user
  precision_for_user = len(tp) / float(len(tp) + len(fp))
  list_out.append(precision_for_user)
  # calculating recall for user
  recall_for_user = len(tp) / float(len(tp) + len(fn))
  list_out.append(recall_for_user)
  # appending each list to the final list named out_u
  out_u.append(list_out)

# final dataframe with precision and recall values of each user
dataframe_final = pd.DataFrame(out_u, columns=['userId', 'precision_user', 'recall_user'])
dataframe_final.head()

Unnamed: 0,userId,precision_user,recall_user
0,1,0.9,0.9
1,2,0.7,0.7
2,3,1.0,1.0
3,4,1.0,1.0
4,5,0.3,0.3


In [24]:
# top 10 recommendations of the given user(top user)
n_top_recommendation_user[1]

["Schindler's List (1993)",
 'Wizard of Oz, The (1939)',
 'Wallace & Gromit: The Best of Aardman Animation (1996)',
 'To Kill a Mockingbird (1962)',
 "One Flew Over the Cuckoo's Nest (1975)",
 'Star Wars: Episode IV - A New Hope (1977)',
 'Apollo 13 (1995)',
 'Big (1988)',
 'Erin Brockovich (2000)',
 'Secret Garden, The (1993)']

In [25]:
# code to calculate Mean absolute error, Root mean squared error, Precision, F-measure, Recall

# Root mean squared error code
root_mean_square_err = sqrt(mean_squared_error(predicted_dataframe['true_rating'], predicted_dataframe['predicted_rating']))
print('Root Mean Squared error of ' + alg + ' is:', root_mean_square_err)

# Mean squared error code
mean_abs_err = mean_absolute_error(predicted_dataframe['true_rating'], predicted_dataframe['predicted_rating'])
print('Mean absolute error of ' + alg + ' is:', mean_abs_err)

# Precision code
avg_precision = dataframe_final['precision_user'].mean()
print('Average precision of '+ alg + ' is:', avg_precision)

# Recall code
avg_recall = dataframe_final['recall_user'].mean()
print('Average recall of '+ alg +' is:', avg_recall)

# F-measure code
mul = 2.0 * avg_precision * avg_recall
F_measure = mul / (avg_precision + avg_recall)
print('F-measure of '+ alg + ' is:', F_measure)

Root Mean Squared error of KNNBaseline is: 0.8941181958617168
Mean absolute error of KNNBaseline is: 0.7053814616839117
Average precision of KNNBaseline is: 0.6778973509933801
Average recall of KNNBaseline is: 0.6778973509933801
F-measure of KNNBaseline is: 0.6778973509933801
