### Simple collaborative filtering algorithm

In [1]:
import math
import pandas as pd
import operator

In [2]:
# We build movie rating for several users 

#Building Custom Data for Movie Rating
review = {
'User 1': {
'Avatar': 4.00,
'The Godfather': 5.00, 
'The Godfather Part II': 4.29,
'Apocalypse Now': 5.00, 
'Jaws': 1.,
'The Notebook':0.5
},
'User 2': {
'The Shawshank Redemption': 4.89, 
'The Shining': 4.93 , 
'The Green Mile': 4.87,
'The Godfather': 1.33,
},
'User 3': {
'Raiders of the Lost Ark': 5.0, 
'Jaws': 4.89,
'Saving Private Ryan': 4.78, 
'Star Wars Episode IV - A New Hope': 4.33,
'Close Encounters of the Third Kind': 4.77,
'The Godfather':  1.25,
'The Notebook': 5,
'The Apartment': 4.8,
'The Godfather Part II': 1.72
},
'User 4':{
'Star Wars Episode IV - A New Hope': 5.00,
'The Notebook': 3
},
'User 5': {
'The Godfather': 4.02, 
'The Godfather Part II': 5.00,
'Avatar': 2
},
'User 6': {
'The Godfather': 3.07, 
'The Godfather Part II': 4.29, 
'Raging Bull': 5.00,
'The Notebook': 1.00,
'Goodfellas':  4.89
},
'User 7': {
'The Godfather': 3.80, 
'The Godfather Part II': 3.61,
'Apocalypse Now': 4.26 
},
'User 8': {
'The Shining': 5.0,
'One Flew Over The Cuckoos Nest': 5.0,
'The Godfather': 2.22,
'The Godfather Part II': 3.34
},
'User 9': {
'The Shawshank Redemption': 4.98,
'The Shining': 4.42,
'Apocalypse Now': 1.63,
'The Godfather': 1.12,
'The Godfather Part II': 2.16,
'Avatar': 4.99
},
'User 10': {
'Raiders of the Lost Ark': 5.0, 
'Star Wars Episode IV - A New Hope': 4.84,
},
'User 11': {
'Saving Private Ryan': 3.78, 
'The Green Mile': 4.96,
'The Godfather': 1.04,
'The Godfather Part II': 1.03
},
'User 12': {
'The Godfather': 5.00, 
'The Godfather Part II': 5.0, 
'Jaws': 1.24,
'One Flew Over The Cuckoos Nest': 2.02
},
'User 13': {
'Raging Bull': 5.0, 
'Goodfellas': 4.87,
'Close Encounters of the Third Kind': 1.14,
'The Godfather': 4.00
},
'User 14': {
'The Godfather': 1.98,
'The Godfather Part II': 1.93,
'Close Encounters of the Third Kind': 1.37
},
'User 15': {
'Jaws': 5.0, 
'Sex in the City': 4.8,
'The Notebook': 4.5,
'Avatar': 4.8,
'Close Encounters of the Third Kind': 2.0,
'The Godfather': 1.07,
'The Godfather Part II': 0.63
},
'User 16': {
'Raging Bull': 4.89, 
'Goodfellas': 5.0,
'The Godfather': 4.87,
'Star Wars Episode IV - A New Hope': 1.32
}
}

In [3]:
# Will User 14 would like The Notebook?

In [4]:
# Function to get common movies b/w Users
def get_common_movies(userA,userB):
    return [movie for movie in review[userA] if movie in review[userB]]

In [5]:
get_common_movies('User 2','User 3')

['The Godfather']

In [6]:
# Reviews from the common movies
def get_reviews(userA,userB):
    common_movies = get_common_movies(userA,userB)
    return [(review[userA][movie], review[userB][movie]) for movie in common_movies]

In [7]:
get_reviews('User 1','User 3')

[(5.0, 1.25), (4.29, 1.72), (1.0, 4.89), (0.5, 5)]

## Euclidean Distance Formula for Calculating similarity
$d(x,y)=\sqrt{(x1-y1)^2 + (x2-y1)^2 + (xn-yn)^2}$

In [8]:
# Function to get Euclidean Distance b/w 2 points 
def euclidean_distance(points):
    squared_diffs = [(point[0] - point[1]) ** 2 for point in points]
    summed_squared_diffs = sum(squared_diffs)
    distance = math.sqrt(summed_squared_diffs)
    return distance

In [9]:
# Function to  calculate similarity more similar less the distance and vice versa
# Added 1 for if highly similar can make the distance zero and give NotDefined Error
def similarity(reviews):
    return 1/ (1 + euclidean_distance(reviews))

In [10]:
# Function to get similarity b/w 2 users
def get_critic_similarity(userA, userB):
    reviews = get_reviews(userA,userB)
    return similarity(reviews)

In [11]:
get_critic_similarity('User 1','User 2')

0.21413276231263384

In [14]:
# Function to give recommendation to users based on their reviews.
def recommend_movies(critic, num_suggestions):
    similarity_scores = [(get_critic_similarity(critic, other), other) for other in review if other != critic]
    # Get similarity Scores for all the critics
    similarity_scores.sort() 
    similarity_scores.reverse()
    similarity_scores = similarity_scores[0:num_suggestions]

    recommendations = {}
    # Dictionary to store recommendations
    for similarity, other in similarity_scores:
        reviewed = review[other]
        # Storing the review
        for movie in reviewed:
            if movie not in review[critic]:
                weight = similarity * reviewed[movie]
                # Weighing similarity with review
                if movie in recommendations:
                    sim, weights = recommendations[movie]
                    recommendations[movie] = (sim + similarity, weights + [weight])
                    # Similarity of movie along with weight
                else:
                    recommendations[movie] = (similarity, [weight])
                    

    for recommendation in recommendations:
        similarity, movie = recommendations[recommendation]
        recommendations[recommendation] = sum(movie) / similarity
        # Normalizing weights with similarity

    sorted_recommendations = sorted(recommendations.items(), key=operator.itemgetter(1), reverse=True)
     #Sorting recommendations with weight
    return sorted_recommendations

In [16]:
recommend_movies('User 16',4)

[('Apocalypse Now', 5.000000000000001),
 ('The Godfather Part II', 4.7280538302277435),
 ('Avatar', 3.2416107382550337),
 ('One Flew Over The Cuckoos Nest', 2.02),
 ('Close Encounters of the Third Kind', 1.14),
 ('Jaws', 1.12),
 ('The Notebook', 0.5)]

In [None]:
#EOF