In [1]:
def convert_int(x):  # this is to make sure the id columns of our data is clean and type integer
    try:
        return int(x)
    except:
        return np.nan  # if exception is raised then we replace id with null value

In [2]:
def predict(ratings, similarity, type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1)
        # We use np.newaxis so that mean_user_rating has same format as ratings
        ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array(
            [np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred


In [3]:
#User-based CF model#
def get_recommendations_User_based_CF(user_id):
    
    print('User-based collaborative filtering\n')
    cosine_similarity_users = cosine_similarity(pivot_df,pivot_df)
    print('cosine_similarity_users.shape\n', cosine_similarity_users.shape)
        
    user_index = user_id-1
    similar_user_indices = cosine_similarity_users[user_index].argsort()[:-51:-1]
    similar_users = (similar_user_indices + 1).tolist()
    del similar_users[0] #Every user is similar to himself, so remove his user_id
    
    print('\nTop 10 recommended movies for user_id {} are below:\n'.format(user_id))    
    recommendations = set()    
    recommendations_list = []
    for userID in similar_users:
        movie_indices = np.where(((pivot_df[userID-1]>4.0)+0)==1)
        movie_ids = np.array(movie_indices) + 1
        recommendations.update(set(movie_ids.flatten()))     
    
    return list(recommendations)

In [4]:
# Model Evaluation #
def model_evaluation(user_id, rec_movie_titles_list):
    # Relevant movies for the user = 'user_viewed'
    user_viewed = user_rated_testdata.loc[(user_rated_testdata['user_id'] == user_id)].sort_values(['rating'], ascending=False)
    print('Total no. of movies user viewed: ', len(user_viewed))

    # Among movies viewed by the user, which are highly rated(liked) = 'user_rated_high'
    user_rated_high = user_viewed.loc[(user_viewed['rating'] >= 4.0)].sort_values(['rating'], ascending=False)
    print('Among the movies user viewed, total no. of movies user rated high(liked): ', len(user_rated_high))

    model_recommended = user_rated_high.loc[user_rated_high['movie_title'].isin(rec_movie_titles_list)]
    print('Among the movies user viewed & rated high(liked), total no. of movies model recommended: ', len(model_recommended))

    recall = round((len(model_recommended)/len(user_rated_high))*100, 2)
    precision = len(user_rated_high)/len(rec_movie_titles_list)
    print('\n******RECALL******')
    print('******Model is able to recommend {}% of movies which are liked(rated high) by the user(user_id:{})******\n'.format(recall, user_id))

In [5]:
def rmse(actual, prediction):  # Root mean squared error calculation
    return sqrt(mean_squared_error(actual, prediction))