# COLLABORATIVE FILTERING
It  recommends  items  based  on  the  similarity  measures  between  users and  items.  The  system  recommends  those  items  that are  preferred  by  similar  category  of  users. Collaborative filtering has many advantages :<br>
1. It is content-independent.
2. In CF people makes explicit ratings so real quality assessment of items is done.
3. It  provides  effective  recommendations  because  it  is  based  on  user’s  similarity  rather  than item’s similarity.
(https://www.researchgate.net/publication/324763207_A_Hybrid_Approach_using_Collaborative_filtering_and_Content_based_Filtering_for_Recommender_System)

### 1. Import library

In [1]:
from data import dataset
from math import sqrt

### 2. Steps of Collaborative Filtering

#### 2.1 Calculate the simmilarity score between two person

In [2]:
def similarity_score(person1,person2):

    # this Returns the ration euclidean distancen score of person 1 and 2

    # To get both rated items by person 1 and 2
    both_viewed = {}

    for item in dataset[person1]:
        if item in dataset[person2]:
            both_viewed[item] = 1
        
        # The Conditions to check if they both have common rating items
        if len(both_viewed) == 0:
            return 0

        # Finding Euclidean distance
        sum_of_eclidean_distance = []

        for item in dataset[person1]:
            if item in dataset[person2]:
                sum_of_eclidean_distance.append(pow(dataset[person1][item] - dataset[person2][item], 2))
        sum_of_eclidean_distance = sum(sum_of_eclidean_distance)
        
        return 1/(1+sqrt(sum_of_eclidean_distance))

#### 2.2 Calculate correlation between two person

In [3]:
def person_correlation(person1, person2):

   # To get both rated items
    both_rated = {}
    for item in dataset[person1]:
        if item in dataset[person2]:
            both_rated[item] = 1

    number_of_ratings = len(both_rated)

    # Checking for ratings in common
    if number_of_ratings == 0:
        return 0

    # Add up all the preferences of each user
    person1_preferences_sum = sum([dataset[person1][item] for item in both_rated])
    person2_preferences_sum = sum([dataset[person2][item] for item in both_rated])

    # Sum up the squares of preferences of each user
    person1_square_preferences_sum = sum([pow(dataset[person1][item],2) for item in both_rated])
    person2_square_preferences_sum = sum([pow(dataset[person2][item],2) for item in both_rated])

    # Sum up the product value of both preferences for each item
    product_sum_of_both_users = sum([dataset[person1][item] * dataset[person2][item] for item in both_rated])

    # Calculate the pearson score
    numerator_value = product_sum_of_both_users - (person1_preferences_sum*person2_preferences_sum/number_of_ratings)
    denominator_value = sqrt((person1_square_preferences_sum - pow(person1_preferences_sum,2)/number_of_ratings) * (person2_square_preferences_sum -pow(person2_preferences_sum,2)/number_of_ratings))

    if denominator_value == 0:
        return 0
    else:
        r = numerator_value / denominator_value
        return r

#### 2.3 Find the most similar user

In [12]:
def most_similar_users(person, number_of_users):

    # returns the number_of_users (similar persons) for a given specific person
    scores = [(person_correlation(person, other_person), other_person) for other_person in dataset if other_person != person]

    # Sort the similar persons so the highest scores person will appear at the first
    scores.sort()
    scores.reverse()
    return scores[0:number_of_users]

#### 2.4 Give movie recommendation for specific user that input

In [13]:
def user_recommendations(person):

    # Gets recommendations for a person by using a weighted average of every other user's rankings
    totals = {}
    simSums = {}
    #rankings_list =[]
    for other in dataset:
        # don't compare me to myself
        if other == person:
            continue
        sim = person_correlation(person,other)
        #print ">>>>>>>",sim

        # ignore scores of zero or lower
        if sim <=0: 
            continue
        for item in dataset[other]:

            # only score movies i haven't seen yet
            if item not in dataset[person] or dataset[person][item] == 0:

            # Similrity * score
                totals.setdefault(item,0)
                totals[item] += dataset[other][item]* sim
                # sum of similarities
                simSums.setdefault(item,0)
                simSums[item]+= sim

        # Create the normalized list

    rankings = [(total/simSums[item],item) for item,total in totals.items()]
    rankings.sort()
    rankings.reverse()
    # returns the recommended items
    recommendataions_list = [recommend_item for score,recommend_item in rankings]
    
    return recommendataions_list, rankings

### 3. Input the name of user

In [17]:
# movie recommendation based on input user
name = input('Input user name : ')
print ('Recommended film : ', user_recommendations(name))

Input user name : Hania
Recommended film :  (['Avengers: End Game', 'Spiderman: Far From Home', 'Captain Marvel', 'The Lion King', 'Aladdin'], [(2.188279355216407, 'Avengers: End Game'), (0.5623441289567186, 'Spiderman: Far From Home'), (0.421758096717539, 'Captain Marvel'), (0.0, 'The Lion King'), (0.0, 'Aladdin')])


In [16]:
# person correlation
name1 = input('Input user name 1 : ')
name2 = input('Input user name 2 : ')
print ('The person correlation : ', person_correlation(name1, name2)) #Lebih ke film yang ditonton sama atau tidak

Input user name 1 : Hania
Input user name 2 : Marina
The person correlation :  -0.30952380952380953


In [18]:
# find friends to watch together
names = input('Input user name : ')
num = input('Input the number of friends you want : ')
print ('The most similar user : ', most_similar_users(name, int(num)))

Input user name : Hania
Input the number of friends you want : 2
[(0.4879500364742666, 'Indra 1991 SM'), (0.4510968544481586, 'faizah')]


In [19]:
# similar score of rating given from each user
name1 = input('Input user name 1 : ')
name2 = input('Input user name 2 : ')
print (similarity_score(name1, name2)) #Lebih ke kemiripan rating kedua orang tsb

Input user name 1 : Hania
Input user name 2 : Genjeh
0.08365085000086825


---------