In [1]:
from datetime import datetime
import random

from collections import defaultdict

MOVIE_TOTAL = 10
USER_TOTAL = 10

def generateRandomData():

    movies_list = list(range(1, MOVIE_TOTAL))
    movies_similarity = []

    while len(movies_similarity) < (MOVIE_TOTAL // 1.1):
        random_movie = random.sample(movies_list, k=2)
        if random_movie not in movies_similarity:
            movies_similarity.append(tuple(random_movie))

    # print(movies_similarity)

    users = range(1, USER_TOTAL)

    friends = dict()

    for user_id in users:
        
        friends_num = random.randint(0, USER_TOTAL//1.5)
        friends_sample = random.sample(users, k=friends_num)

        if user_id in friends_sample:
            friends_sample.remove(user_id)

        friends[user_id] = set(friends_sample)


    friends_watched = dict()

    for user_id in users:

        watched_num = random.randint(0, MOVIE_TOTAL - 1)
        friends_watched[user_id] = set(random.sample(movies_list, k=watched_num))

    return movies_list, movies_similarity, friends, friends_watched


"""
So you should return the film with the highest number: F / S, 
where F = number of friends who have seen this movie, 
and S = mean of the number of similar movies seen for each friend.
"""
class FilmFeed:

    def __init__(self, movies, movie_similarity, friends, friends_watched):
        self.movies = movies
        # self.movie_similarity = movie_similarity
        self.friends = friends
        self.friends_watched = friends_watched

        self.__formatSimilarity(movie_similarity)

        # print('movie list', self.movies)
        # print('movie_similarity list', self.movie_similarity)
        # print('friends list', self.friends)
        # print('friends_watched list', self.friends_watched)

        # print('data processed', datetime.now())


    def __formatSimilarity(self, movie_similarity):
        self.movie_similarity = defaultdict(set)

        for pair in movie_similarity:
            self.__buildSimilarity(pair[0], set([pair[1]]))
            self.__buildSimilarity(pair[1], set([pair[0]]))

        # remove movie from its own similar list
        for movie_id in self.movie_similarity:
            if movie_id in self.movie_similarity[movie_id]:
                self.movie_similarity[movie_id].remove(movie_id)


    def __buildSimilarity(self, movie_id, similar_ids):
        # similarity is transitive, 
        # so add other similar movies to the similar movie list
        similar_ids.update(self.movie_similarity[movie_id])
        # update this movie's similar list
        self.movie_similarity[movie_id].update(similar_ids)
        # copy the set to avoid changing set length during iteration 
        related_movies = self.movie_similarity[movie_id].copy()

        for mid in related_movies:
            # when similarity is not fully added to all similar movies similar list,
            # do recursive call
            if not similar_ids.issubset(self.movie_similarity[mid]):
                self.__buildSimilarity(mid, similar_ids)


    def discussability(self, user_id):
        """
        F = number of friends who have seen this movie, 
        """
        discuss = defaultdict(int)

        # user not in the friends list, means user got no friends
        if user_id not in self.friends or len(self.friends[user_id]) == 0:
            return discuss

        for movie_id in self.movies:
            for friend_id in self.friends[user_id]:
                # when movie id in friends' watch history, accumulate discussability
                if movie_id in self.friends_watched[friend_id]:
                    discuss[movie_id] += 1
        
        return discuss


    def uniqueness(self, user_id):
        """
        S = mean of the number of similar movies seen for each friend.
        """
        
        similar_watched_by_friends = defaultdict(float)

        # user has no friends
        if user_id not in self.friends or len(self.friends[user_id]) == 0:
            return similar_watched_by_friends

        total_friends = len(self.friends[user_id])

        # iterate every movie and every friends of this user
        for movie_id in self.movies:
            # total similar movies watched by friends
            similar_watched = 0

            for friend_id in self.friends[user_id]:
                # get similar movies
                similar_movies = self.movie_similarity[movie_id]
                # get friends watched movies
                friends_watched = self.friends_watched[friend_id]
                # get intersection of above two sets
                similar_watched += len(similar_movies & friends_watched)
            # get mean of similar movies watched by user's friends
            similar_watched_by_friends[movie_id] = similar_watched / total_friends

        return similar_watched_by_friends

    def recommend(self, user_id):
        discuss = self.discussability(user_id)
        unique = self.uniqueness(user_id)

        # print('discussability of each moive', discuss)
        # print('uniqueness of each moive', unique)

        # movie_id, discussability
        zero_uniue = (0,0)
        # movie_id, F/S score
        fs = (0,0)

        for movie_id in self.movies:
            # when no firends watch any similar movie
            if unique[movie_id] == 0:
                # we select the movie with largest discussability
                if discuss[movie_id] >= zero_uniue[1]:
                    zero_uniue = (movie_id, discuss[movie_id])
            else:
                fs_score = discuss[movie_id] / unique[movie_id]
                if fs_score > fs[1]:
                    fs = (movie_id, fs_score)
                
        if zero_uniue[0] != 0:
            print('zero uniqueness, recommend', zero_uniue)
            return zero_uniue[1]
        else:
            print('recommend by f/s score', fs)
            return fs[0]


movies, movie_similarity, friends, friends_watched = generateRandomData()

feed = FilmFeed(movies, movie_similarity, friends, friends_watched)

random_user_id = random.randint(1,USER_TOTAL-1)

feed.recommend(random_user_id)

# print('finished', datetime.now())

zero uniqueness, recommend (4, 2)


2