In [15]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [18]:
#Creating Dataset
dataset={
        'Firdose': {'Money Heist': 5,
                  'Criminal Justice': 3,
                  'Family-Man': 3,
                  'Sacred Games': 3,
                  'Apharan': 2,
                  'Mirzapur': 3},
    
        'Mahi': {'Money Heist': 5,
                    'Criminal Justice': 3,
                    'Sacred Games': 5,
                    'Family-Man':5,
                    'Mirzapur': 3,
                    'Apharan': 3},
    
        'Bindu': {'Money Heist': 2,
                   'Family-Man': 5,
                   'Sacred Games': 3,
                   'Mirzapur': 4},
    
        'Sajid': {'Family-Man': 5,
                   'Mirzapur': 4,
                   'Sacred Games': 4,},
    
       'Ishita': {'Money Heist': 4,
                    'Criminal Justice': 4,
                    'Family-Man': 4,
                    'Mirzapur': 3,
                    'Apharan': 2},
    
       'Sruthi': {'Money Heist': 3,
                  'Family-Man': 4,
                  'Mirzapur': 3,
                  'Sacred Games': 5,
                  'Apharan': 3},
    
      'Hani': {'Family-Man':4,
                  'Apharan':1,
                  'Sacred Games':4}}

In [19]:
#create a data frame of this dataset
dataset_df=pd.DataFrame(dataset)
dataset_df.fillna("Not Watched",inplace=True)
dataset_df

Unnamed: 0,Firdose,Mahi,Bindu,Sajid,Ishita,Sruthi,Hani
Money Heist,5,5,2.0,Not Watched,4.0,3.0,Not Watched
Criminal Justice,3,3,Not Watched,Not Watched,4.0,Not Watched,Not Watched
Family-Man,3,5,5.0,5.0,4.0,4.0,4.0
Sacred Games,3,5,3.0,4.0,Not Watched,5.0,4.0
Apharan,2,3,Not Watched,Not Watched,2.0,3.0,1.0
Mirzapur,3,3,4.0,4.0,3.0,3.0,Not Watched


In [20]:
# custom function to create unique set of web series

def unique_items():
    unique_items_list = []
    for person in dataset.keys():
        for items in dataset[person]:
            unique_items_list.append(items)
    s=set(unique_items_list)
    unique_items_list=list(s)
    return unique_items_list
unique_items()

['Apharan',
 'Family-Man',
 'Mirzapur',
 'Criminal Justice',
 'Money Heist',
 'Sacred Games']

In [21]:
# custom function to implement cosine similarity between two items i.e. web series

def item_similarity(item1,item2):
    both_rated = {}
    for person in dataset.keys():
        if item1 in dataset[person] and item2 in dataset[person]:
            both_rated[person] = [dataset[person][item1],dataset[person][item2]]

    #print(both_rated)
    number_of_ratings = len(both_rated)
    if number_of_ratings == 0:
        return 0

    item1_ratings = [[dataset[k][item1] for k,v in both_rated.items() if item1 in dataset[k] and item2 in dataset[k]]]
    item2_ratings = [[dataset[k][item2] for k, v in both_rated.items() if item1 in dataset[k] and item2 in dataset[k]]]
    #print("{} ratings :: {}".format(item1,item1_ratings))
    #print("{} ratings :: {}".format(item2,item2_ratings))
    cs = cosine_similarity(item1_ratings,item2_ratings)
    return cs[0][0]

In [22]:
print("Cosine Similarity:: ",item_similarity('Money Heist','Sacred Games'))

Cosine Similarity::  0.9319768053120148


In [23]:
#custom function to check most similar items 

def most_similar_items(target_item):
    un_lst=unique_items()
    scores = [(item_similarity(target_item,other_item),target_item+" --> "+other_item) for other_item in un_lst if other_item!=target_item]
    scores.sort(reverse=True)
    return scores

In [32]:
most_similar_items('Sacred Games')

[(0.9749005254295224, 'Sacred Games --> Family-Man'),
 (0.9701425001453319, 'Sacred Games --> Criminal Justice'),
 (0.9630868246861537, 'Sacred Games --> Apharan'),
 (0.9517199077929532, 'Sacred Games --> Mirzapur'),
 (0.9319768053120148, 'Sacred Games --> Money Heist')]

In [24]:
#custom function to filter the seen movies and unseen movies of the target user

def target_movies_to_users(target_person):
    target_person_movie_lst = []
    unique_list =unique_items()
    for movies in dataset[target_person]:
        target_person_movie_lst.append(movies)

    s=set(unique_list)
    recommended_movies=list(s.difference(target_person_movie_lst))
    a = len(recommended_movies)
    if a == 0:
        return 0
    return recommended_movies,target_person_movie_lst

In [29]:
unseen_movies,seen_movies=target_movies_to_users('Hani')

dct = {"Unseen Movies":unseen_movies,"Seen Movies":seen_movies}
pd.DataFrame(dct)


Unnamed: 0,Unseen Movies,Seen Movies
0,Mirzapur,Family-Man
1,Criminal Justice,Apharan
2,Money Heist,Sacred Games


In [26]:
def recommendation_phase(target_person):
    if target_movies_to_users(target_person=target_person) == 0:
        print(target_person, "has seen all the movies")
        return -1
    not_seen_movies,seen_movies=target_movies_to_users(target_person=target_person)
    seen_ratings = [[dataset[target_person][movies],movies] for movies in dataset[target_person]]
    weighted_avg,weighted_sim = 0,0
    rankings =[]
    for i in not_seen_movies:
        for rate,movie in seen_ratings:
            item_sim=item_similarity(i,movie)
            weighted_avg +=(item_sim*rate)
            weighted_sim +=item_sim
        weighted_rank=weighted_avg/weighted_sim
        rankings.append([weighted_rank,i])

    rankings.sort(reverse=True)
    return rankings

In [28]:
print("Enter the target person")
tp = input().title()
if tp in dataset.keys():
    a=recommendation_phase(tp)
    if a != -1:
        print("Recommendation Using Item based Collaborative Filtering:  ")
        for weight,movie in a:
            print(movie, '-->', weight)
else:
    print("Person not found in the dataset..please try again")


Enter the target person
Hani
Recommendation Using Item based Collaborative Filtering:  
Criminal Justice --> 3.001430555956084
Money Heist --> 2.99513159668599
Mirzapur --> 2.99363310192852
