### Recommendation Systems - User Based Collaborative Filtering

<img src='https://buildingrecommenders.files.wordpress.com/2015/11/user-based-similarity-full.png?w=319&h=300'/>

In [1]:
# import necessary libraries

import pandas as pd
from math import sqrt

In [36]:
#dictionary with usernames their watched web series and ratings out of 5

dataset={
        'Tanya': {'Pop-up Shop': 5,
                  'Promotional Event': 3,
                  'Birthday Celebration': 3,
                  'Business Meeting': 3,
                  'Personal Gathering': 2,
                  'Miscellaneous': 3},
    
        'Mohan': {'Pop-up Shop': 5,
                    'Promotional Event': 3,
                    'Business Meeting': 5,
                    'Birthday Celebration':5,
                    'Miscellaneous': 3,
                    'Personal Gathering': 3},
    
        'Tasha': {'Pop-up Shop': 2,
                   'Birthday Celebration': 5,
                   'Business Meeting': 3,
                   'Miscellaneous': 4},
    
        'Nirbhay': {'Birthday Celebration': 5,
                   'Miscellaneous': 4,
                   'Business Meeting': 4,},
    
       'Muskan': {'Pop-up Shop': 4,
                    'Promotional Event': 4,
                    'Birthday Celebration': 4,
                    'Miscellaneous': 3,
                    'Personal Gathering': 2},
    
       'Anshika': {'Pop-up Shop': 3,
                  'Birthday Celebration': 4,
                  'Miscellaneous': 3,
                  'Business Meeting': 5,
                  'Personal Gathering': 3},
    
      'Dhawal': {'Birthday Celebration':4,
                  'Personal Gathering':1,
                  'Business Meeting':4}}

In [35]:
#create a data frame of this dataset

dataset_df=pd.DataFrame(dataset)
dataset_df.fillna("N/A",inplace=True)
dataset_df

  dataset_df.fillna("N/A",inplace=True)


Unnamed: 0,Tanya,Mohan,Tasha,Nirbhay,Muskan,Anshika,Dhawal
Pop-up Shop,5,5,2.0,,4.0,3.0,
Promotional Event,3,3,,,4.0,,
Birthday Celebration,3,5,5.0,5.0,4.0,4.0,4.0
Business Meeting,3,5,3.0,4.0,,5.0,4.0
Personal Gathering,2,3,,,2.0,3.0,1.0
Miscellaneous,3,3,4.0,4.0,3.0,3.0,


In [37]:
# custom function to create unique set of web series

def unique_items():
    unique_items_list = []
    for person in dataset.keys():
        for items in dataset[person]:
            unique_items_list.append(items)
    s=set(unique_items_list)
    unique_items_list=list(s)
    return unique_items_list

In [38]:
unique_items()

['Promotional Event',
 'Miscellaneous',
 'Pop-up Shop',
 'Business Meeting',
 'Birthday Celebration',
 'Personal Gathering']

<img src='https://miro.medium.com/max/2512/1*Xvf2o6kE4VCuueMPikxZ_A.png'/>

In [39]:
# custom function to create pearson correlation method from scratch

def person_corelation(person1,person2):
    both_rated = {}
    for item in dataset[person1]:
        if item in dataset[person2]:
            both_rated[item] = 1

    number_of_ratings = len(both_rated)
    if number_of_ratings == 0:
        return 0

    person1_preferences_sum = sum([dataset[person1][item] for item in both_rated])
    person2_preferences_sum = sum([dataset[person2][item] for item in both_rated])

    # Sum up the squares of preferences of each user
    person1_square_preferences_sum = sum([pow(dataset[person1][item], 2) for item in both_rated])
    person2_square_preferences_sum = sum([pow(dataset[person2][item], 2) for item in both_rated])

    # Sum up the product value of both preferences for each item
    product_sum_of_both_users = sum([dataset[person1][item] * dataset[person2][item] for item in both_rated])

    # Calculate the pearson score
    numerator_value = product_sum_of_both_users - (
    person1_preferences_sum * person2_preferences_sum / number_of_ratings)
    denominator_value = sqrt((person1_square_preferences_sum - pow(person1_preferences_sum, 2) / number_of_ratings) * (
    person2_square_preferences_sum - pow(person2_preferences_sum, 2) / number_of_ratings))
    if denominator_value == 0:
        return 0
    else:
        r = numerator_value / denominator_value
        return r

## Phase 1 - User Similarity by using Pearson Correlation method

In [40]:
# custom function to check most similar users

def most_similar_users(target_person,no_of_users):
    
    # Used list comprehension for finding pearson similarity between users
    scores = [(person_corelation(target_person,other_person),other_person) for other_person in dataset if other_person !=target_person]
    
    #sort the scores in descending order
    scores.sort(reverse=True)
    
    #return the scores between the target person & other persons
    return scores[0:no_of_users]

In [41]:
# function check by input one person name & returns the similarity score 

most_similar_users('Mohan',6)

[(1.0000000000000004, 'Dhawal'),
 (0.6123724356957959, 'Muskan'),
 (0.6123724356957959, 'Anshika'),
 (0.5570860145311555, 'Tanya'),
 (0.49999999999999933, 'Nirbhay'),
 (-0.2581988897471611, 'Tasha')]

In [42]:
#custom function to filter the seen movies and unseen movies of the target user

def target_movies_to_users(target_person):
    target_person_movie_lst = []
    unique_list =unique_items()
    for movies in dataset[target_person]:
        target_person_movie_lst.append(movies)

    s=set(unique_list)
    recommended_movies=list(s.difference(target_person_movie_lst))
    a = len(recommended_movies)
    if a == 0:
        return 0
    return recommended_movies,target_person_movie_lst

In [44]:
# function check

unseen_movies,seen_movies=target_movies_to_users('Nirbhay')

dct = {"Not used for":unseen_movies,"Used for":seen_movies}
pd.DataFrame(dct)

Unnamed: 0,Not used for,Used for
0,Personal Gathering,Birthday Celebration
1,Pop-up Shop,Miscellaneous
2,Promotional Event,Business Meeting


In [53]:
# get stations related to recommendation

stations_recommendations = {
    "Promotional Event": ["Bras Basah", "City Hall"],
    "Personal Gathering": ["Caldecott"],
    "Pop-up Shop": ["Bras Basah", "City Hall", "Caldecott"]
}

## Phase 2: Recommendation Phase - recommending ```web series``` on the behalf of user similarity

In [48]:
def recommendation_phase(person):
    # Gets recommendations for a person by using a weighted average of every other user's rankings
    totals = {}  #empty dictionary
    simSums = {} # empty dictionary
    for other in dataset:
        # don't compare me to myself
        if other == person:
            continue
        sim = person_corelation(person, other)

        # ignore scores of zero or lower
        if sim <= 0:
            continue
        for item in dataset[other]:
            # only score movies i haven't seen yet
            if item not in dataset[person]:
                # Similrity * score
                totals.setdefault(item, 0)
                totals[item] += dataset[other][item] * sim
                # sum of similarities
                simSums.setdefault(item, 0)
                simSums[item] += sim
                # Create the normalized list

    rankings = [(total / simSums[item], item) for item, total in totals.items()]
    rankings.sort(reverse=True)
    # returns the recommended items
    recommendations_list = [(recommend_item,score) for score, recommend_item in rankings]
    return recommendations_list

In [54]:
print("Enter the target person")
tp = input().title()
if tp in dataset.keys():
    a=recommendation_phase(tp)
    if a != -1:
        print("Recommendation Using User based Collaborative Filtering:  ")
        for webseries,weights in a:
            print(webseries,'---->',weights, "---->", stations_recommendations[webseries])
else:
    print("Person not found in the dataset..please try again")

Enter the target person
Recommendation Using User based Collaborative Filtering:  
Promotional Event ----> 3.666666666666667 ----> ['Bras Basah', 'City Hall']
Pop-up Shop ----> 3.479274057836309 ----> ['Bras Basah', 'City Hall', 'Caldecott']
Personal Gathering ----> 2.333333333333333 ----> ['Caldecott']
