In [1]:
from collections import defaultdict
import pandas as pd
from surprise import Reader, Dataset
from surprise import KNNWithMeans, KNNBasic, SVD
from surprise import accuracy
from surprise.model_selection import train_test_split, cross_validate

import matplotlib.pyplot as plt
import numpy as np
import json

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
class RefinedMyAlgo():
    def __init__(self, rating_data='', data_frame='', movie_data=''):
        if rating_data:
            reader = Reader(line_format='user item rating timestamp', sep=',')
            self.ratings = Dataset.load_from_file(rating_data, reader)
#             self.trainset, self.testset = train_test_split(self.ratings, test_size=0.25)
            self.trainset = self.ratings.build_full_trainset()
            self.sim_options = {'name': 'cosine','user_based': False}
        elif not data_frame.empty:
            reader = Reader(rating_scale=(0, 5))
            self.ratings = Dataset.load_from_df(data_frame[['userId', 'movieId', 'rating']], reader)
            self.trainset = self.ratings.build_full_trainset()
            self.sim_options = {'name': 'cosine','user_based': False}
            
        if movie_data:
            self.movies = pd.read_csv(movie_data, low_memory=False)
            self.movies['year'] = self.movies['title'].apply(lambda x: x[-5:-1])
            self.movies['title'] = self.movies['title'].apply(lambda x: x[:-7])
            self.movies['genres'] = self.movies['genres'].apply(lambda x: x.replace('|',', '))

        
    def set_k(self, k_value=''):
        if k_value:
            algo = KNNWithMeans(k=k_value, sim_options=self.sim_options)
            self.algo = algo
            self.algo.fit(self.trainset)
        else:
            algo = SVD()
            self.algo = algo
            self.algo.fit(self.trainset)
        
        
    def find_best_k(self, k_value=''):
        if k_value:
            print('K = {}'.format(k_value))
            algo = KNNWithMeans(k=k_value, sim_options=self.sim_options)
            return cross_validate(algo, self.ratings, measures=['RMSE', 'MAE'], cv=10, verbose=True)
        else:
            aux = []
            for k_value in [3, 5, 7, 10, 15, 20, 30, 40]:
                print('K = {}'.format(k_value))
                algo = KNNWithMeans(k=k_value, sim_options=self.sim_options)
                my_dict = cross_validate(algo, self.ratings, measures=['RMSE', 'MAE'], cv=10, verbose=False)
                my_dict['k_value'] = k_value
                aux.append(my_dict)
            return aux
    
    
    def set_testset(self, users):
        if users:
            user_ratings = self.trainset.ur
            movies_ids = list(self.movies['movieId'])
            global_mean=self.trainset.global_mean
            my_testset = []
            
            for user in users:
                iuid = self.trainset.to_inner_uid(str(user))
                for movie in movies_ids:
                    is_in = False
                    for rating in user_ratings[iuid]:
#                         print( 'MOVIE: {}, RATING: {}'.format(movie,bla.trainset.to_raw_iid(rating[0])) )
                        if int(movie) == int(self.trainset.to_raw_iid(int(rating[0]))):
                            is_in = True
                            break
                    if not is_in:
                        my_tuple = (str(user),str(movie),global_mean)
                        my_testset.append(my_tuple)
                        
            self.testset = my_testset
        else:
            testset = self.trainset.build_anti_testset()
            self.testset = testset
        return self.testset


    def predict_ratings(self,users=''):
        # # Predict ratings for all pairs (u, i) that are NOT in the training set.
#         testset = self.trainset.build_anti_testset()
#         self.testset = testset
        testset = self.set_testset(users)
        predictions = self.algo.test(testset)
        self.predictions = predictions
        
        
    def set_perfil_movies(self, users):
        metadata = pd.read_csv('ml-latest-small/ratings.csv', low_memory=False, names=['userId', 'movieId', 'rating','timestamp'])
        metadata = metadata.drop(columns="timestamp")

        metadata_filtered = metadata[metadata.userId.isin(users)]

        self.group_sparse_mtx = pd.pivot_table(metadata_filtered, values='rating', index=['userId'], columns=['movieId'], fill_value=0)
        
        self.perfil_movies = list(self.group_sparse_mtx)
        
    
    ### You must call self.set_perfil_movies() before
    def set_candidate_movies(self):
        candidate_movies = []
        for item in refinedMyAlgo.movies.iterrows():
        #     get the movieId of each movie in movies dataframe
            if item[1].values[0] not in self.perfil_movies:
                candidate_movies.append(item[1].values[0])
        self.candidate_movies = candidate_movies
        
        
    def calc_similarity_matrix(self):
        #Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
        tfidf = TfidfVectorizer(stop_words='english')
        
        #Replace NaN with an empty string
        self.movies['title'] = self.movies['title'].fillna('')
        self.movies['genres'] = self.movies['genres'].fillna('')
        
        #Construct the required TF-IDF matrix by fitting and transforming the data
        tfidf_matrix_title = tfidf.fit_transform(self.movies['title'])
        tfidf_matrix_genres = tfidf.fit_transform(self.movies['genres'])
        
        #Compute the cosine similarity matrix
        self.cosine_sim_movies_title = cosine_similarity(tfidf_matrix_title, tfidf_matrix_title)
        self.cosine_sim_movies_genres = cosine_similarity(tfidf_matrix_genres, tfidf_matrix_genres)
        
        
    def get_similar_movies(self, references, title_weight=0.5):
        recs = []
        for movie in references:
            # Get the pairwsie similarity scores of all movies with that movie
            movie_idx = int(self.movies[self.movies['movieId']==movie['movieID']].index[0])
            sim_scores_title = list(enumerate(self.cosine_sim_movies_title[movie_idx]))
            sim_scores_genres = list(enumerate(self.cosine_sim_movies_genres[movie_idx]))
            
            # Calculate total similarity based on title and genres
            total_sim_score = []
            for i in range(len(sim_scores_title)):
                print("sim_score_title= {}\t sim_score_genres= {}".format(sim_scores_title[i][1], sim_scores_genres[i][1]))
                aux = (sim_scores_title[i][1]*title_weight) + (sim_scores_genres[i][1]*(1-title_weight))
                total_sim_score.append((i, aux))
                print("sim_score_total= {}".format(total_sim_score))
                
            # Sort the movies based on the similarity scores
            total_sim_score = sorted(total_sim_score, key=lambda x: x[1], reverse=True)
            self.total_sim_score = total_sim_score
            
            candidates_sim_score = []
            for item in total_sim_score:
                if self.movies.loc[item[0]].values[0] not in self.perfil_movies:
                    candidates_sim_score.append(item)
            
            # Get the scores of the 10 most similar movies
            candidates_sim_score = candidates_sim_score[1:11]
            
            recs.append(candidates_sim_score)
            
        return recs
    
    
    def get_relevance_score(self, recs, references):
        count = 0
        recs_dict = []
        for reference in references:
        #     print('Referência: {}\t gêneros: {}'.format(refinedMyAlgo.movies[refinedMyAlgo.movies['movieId']==reference['movieID']].values[0][1], refinedMyAlgo.movies[refinedMyAlgo.movies['movieId']==reference['movieID']].values[0][2]))

            for movie in recs[count]:
                aux = {}

                movie_id = self.movies.loc[movie[0]].values[0]
                movie_title = self.movies.loc[movie[0]].values[1]
                movie_genres = self.movies.loc[movie[0]].values[2]
                movie_similarity = movie[1]
                movie_relevance = ((reference['rating']/5.0)+movie_similarity)/2

                aux['movie_id'] = movie_id
                aux['movie_title'] = movie_title
                aux['movie_genres'] = movie_genres
                aux['movie_similarity'] = movie_similarity
                aux['movie_relevance'] = movie_relevance

                recs_dict.append(aux)

        #         print('\tSim: {},\trelevance: {},\tmovieId: {},\ttitle: {}'.format(aux['movie_similarity'], aux['movie_relevance'], aux['movie_id'], aux['movie_title']))

            count=count+1

        recs_dict = sorted(recs_dict, key = lambda i: i['movie_relevance'],reverse=True)

        return recs_dict
    
    
    def calc_distance_item_in_list(self, item, this_list, title_weight=0.5):

        idx_i = int(self.movies[self.movies['movieId']==item['movie_id']].index[0])

        total_dist = 0
        for movie in this_list:
            
            idx_j = int(self.movies[self.movies['movieId']==int(movie['movie_id'])].index[0])

            sim_i_j = (self.cosine_sim_movies_title[idx_i][idx_j]*title_weight) + (self.cosine_sim_movies_genres[idx_i][idx_j]*(1-title_weight))
            dist_i_j = 1 - sim_i_j
            total_dist = total_dist + dist_i_j

        result = total_dist/len(this_list)
        return result
    
    
    def calc_diversity_score(self, actual_list, candidates_list, alfa=0.5):
        '''
        This function implemented here was based on MARIUS KAMINSKAS and DEREK BRIDGE paper: Diversity, Serendipity, Novelty, and Coverage: A Survey and Empirical Analysis of Beyond-Accuracy Objectives in Recommender Systems
        func(i,R) = (relevance[i]*alfa) + (dist_i_R(i,R)*(1-alfa))
        '''
        diversity_score = []
        count = 0

        for item in candidates_list:

            aux = {}
            dist_item_R = self.calc_distance_item_in_list(item=item, this_list=actual_list)
            aux['div_score'] = (item['movie_relevance']*alfa) + (dist_item_R*(1-alfa))
            aux['idx'] = count
            diversity_score.append(aux)
            count = count + 1

        return diversity_score
    
    
    def diversify_recs_list(self, recs, n=10):
        '''
        This function implemented here was based on MARIUS KAMINSKAS and DEREK BRIDGE paper: Diversity, Serendipity, Novelty, and Coverage: A Survey and Empirical Analysis of Beyond-Accuracy Objectives in Recommender Systems
        The Greedy Reranking Algorithm.
        '''
        diversified_list = []
        
        while len(diversified_list) < n:
            if len(diversified_list) == 0:
                diversified_list.append(recs[0])
                recs.pop(0)
            else:
                diversity_score = self.calc_diversity_score(actual_list=diversified_list, candidates_list=recs)
                diversity_score = sorted(diversity_score, key = lambda i: i['div_score'],reverse=True)
#               #  Add the item that maximize diversity in the list 
                item = diversity_score[0]
                diversified_list.append(recs[item['idx']])
#               #  Remove this item from the candidates list
                recs.pop(item['idx'])
    
        return diversified_list
                

In [3]:
refinedMyAlgo = RefinedMyAlgo(rating_data='ml-latest-small/ratings.csv', movie_data='ml-latest-small/movies.csv')
refinedMyAlgo.set_k()

In [4]:
my_users = [77,596,452,243,420]

refinedMyAlgo.predict_ratings(users=my_users)
len(refinedMyAlgo.predictions)

47891

In [6]:
# metadata = pd.read_csv('ml-latest-small/ratings.csv', low_memory=False, names=['userId', 'movieId', 'rating','timestamp'])
# metadata = metadata.drop(columns="timestamp")

# metadata_filtered = metadata[metadata.userId.isin(my_users)]

# my_group_sparse = pd.pivot_table(metadata_filtered, values='rating', index=['userId'], columns=['movieId'], fill_value=0)
# my_group_sparse.head()

refinedMyAlgo.set_perfil_movies(users=my_users)
refinedMyAlgo.set_candidate_movies()

# print(refinedMyAlgo.perfil_movies)
# print(refinedMyAlgo.candidate_movies)
refinedMyAlgo.group_sparse_mtx.head()

movieId,1,10,32,34,36,39,44,47,48,50,...,176101,177763,178615,179401,179819,181719,182793,183635,184997,188301
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
77,0,0,0.0,0,0,0,0,0,0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0
243,0,5,0.0,0,4,0,4,0,4,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0
420,4,0,3.5,0,0,0,0,0,0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0
452,0,4,0.0,0,0,0,4,5,0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0
596,4,0,3.5,4,0,4,0,0,0,3.5,...,2.5,3.5,3.5,4,3.5,3.5,3.5,3.5,4,4


In [7]:
group_filled_mtx = refinedMyAlgo.group_sparse_mtx.copy()

for index, row in group_filled_mtx.iterrows():
    for col in list(group_filled_mtx):
        if(group_filled_mtx.loc[index,col] == 0.0):
            aux = list(filter(lambda x: x.uid==str(index) and x.iid==str(col), refinedMyAlgo.predictions))
            group_filled_mtx.loc[index,col] = aux[0].est

group_filled_mtx.head()

movieId,1,10,32,34,36,39,44,47,48,50,...,176101,177763,178615,179401,179819,181719,182793,183635,184997,188301
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
77,3.859585,3.434366,4.002321,4.064333,4.06631,3.246319,2.950969,3.862087,3.29739,4.122691,...,3.459133,3.730887,3.65234,3.624485,3.338429,3.981289,3.432812,3.492644,3.504513,3.78251
243,4.170347,5.0,4.667564,4.192077,4.0,4.200058,4.0,4.861502,4.0,4.786181,...,3.907284,4.270191,4.184586,4.339069,4.063219,4.49772,4.034645,4.365129,4.341662,4.467247
420,4.0,3.56962,3.5,3.546648,3.969556,3.600434,2.75984,3.976766,3.259909,4.173383,...,3.390765,3.455761,3.63657,3.613236,3.361072,3.594036,3.515131,3.60085,3.827188,4.046915
452,4.891613,4.0,4.68235,4.720381,4.728214,4.667525,4.0,5.0,4.305875,5.0,...,4.286184,4.492363,4.386362,4.51913,4.137538,4.574832,4.473645,4.441904,4.663808,4.691109
596,4.0,3.45619,3.5,4.0,3.409371,4.0,2.475122,4.053378,3.306434,3.5,...,2.5,3.5,3.5,4.0,3.5,3.5,3.5,3.5,4.0,4.0


In [8]:
########################################################################
# # Implementing least misery ending-up in a dataframe
########################################################################
values = []
labels = []
for i in range(0,len(list(group_filled_mtx))):
    my_col = group_filled_mtx.iloc[ : ,i]
    label = my_col.name
    my_col = list(my_col)
    
    labels.append(label)
    values.append( float(min(my_col)) )
    
# print('Array values: {}, Array labels: {}'.format(values, labels))
agg_group_perf = pd.DataFrame(index=[900], columns=labels)

for i in range(0,len(list(agg_group_perf))):
    agg_group_perf.iloc[0, i] = values[i]

agg_group_perf.head()

Unnamed: 0,1,10,32,34,36,39,44,47,48,50,...,176101,177763,178615,179401,179819,181719,182793,183635,184997,188301
900,3.85958,3.43437,3.5,3.54665,3.40937,3.24632,2.47512,3.86209,3.25991,3.5,...,2.5,3.45576,3.5,3.61324,3.33843,3.5,3.43281,3.49264,3.50451,3.78251


In [9]:
group_pref_dict = []
for col in list(agg_group_perf):
    my_dict = {}
#     print('Valor: {}, Coluna: {}'.format(agg_group_perf.loc[900,col], col))
    my_dict['rating'] = agg_group_perf.loc[900,col]
    my_dict['movieID'] = col
    group_pref_dict.append(my_dict)
    
group_pref_dict = sorted(group_pref_dict, key = lambda i: i['rating'],reverse=True)
group_pref_dict

[{'rating': 4.327000304811133, 'movieID': 38061},
 {'rating': 4.308348486346194, 'movieID': 904},
 {'rating': 4.168392053557412, 'movieID': 5618},
 {'rating': 4.153576854675985, 'movieID': 31658},
 {'rating': 4.1484416521143475, 'movieID': 57669},
 {'rating': 4.1475717589538945, 'movieID': 2160},
 {'rating': 4.134730610362691, 'movieID': 6350},
 {'rating': 4.128985750361517, 'movieID': 5971},
 {'rating': 4.124521103370452, 'movieID': 318},
 {'rating': 4.084720774637426, 'movieID': 168252},
 {'rating': 4.0766406160669275, 'movieID': 51255},
 {'rating': 4.029465394910068, 'movieID': 6807},
 {'rating': 4.014132395355429, 'movieID': 1079},
 {'rating': 4.011076739460682, 'movieID': 608},
 {'rating': 4.011060011917754, 'movieID': 3000},
 {'rating': 4.0, 'movieID': 260},
 {'rating': 4.0, 'movieID': 750},
 {'rating': 4.0, 'movieID': 1035},
 {'rating': 4.0, 'movieID': 1136},
 {'rating': 4.0, 'movieID': 1193},
 {'rating': 4.0, 'movieID': 1203},
 {'rating': 4.0, 'movieID': 1210},
 {'rating': 4.0,

In [10]:
refinedMyAlgo.calc_similarity_matrix()

In [11]:
references = group_pref_dict[0:10]
# references = group_pref_dict

for item in references:
    print(item)
# print(references)

{'rating': 4.327000304811133, 'movieID': 38061}
{'rating': 4.308348486346194, 'movieID': 904}
{'rating': 4.168392053557412, 'movieID': 5618}
{'rating': 4.153576854675985, 'movieID': 31658}
{'rating': 4.1484416521143475, 'movieID': 57669}
{'rating': 4.1475717589538945, 'movieID': 2160}
{'rating': 4.134730610362691, 'movieID': 6350}
{'rating': 4.128985750361517, 'movieID': 5971}
{'rating': 4.124521103370452, 'movieID': 318}
{'rating': 4.084720774637426, 'movieID': 168252}


In [12]:
# references=[904, 318, 38061, 31658, 57669]
recs = refinedMyAlgo.get_similar_movies(references)

In [13]:
print(refinedMyAlgo.total_sim_score)

[(9463, 1.0), (1904, 0.8174867105895136), (507, 0.5), (595, 0.5), (1056, 0.5), (1261, 0.5), (1767, 0.5), (1906, 0.5), (1907, 0.5), (1908, 0.5), (1909, 0.5), (1921, 0.5), (1987, 0.5), (2113, 0.5), (2114, 0.5), (2683, 0.5), (2760, 0.5), (2843, 0.5), (3369, 0.5), (3598, 0.5), (3694, 0.5), (4459, 0.5), (4469, 0.5), (4685, 0.5), (5080, 0.5), (5273, 0.5), (5582, 0.5), (5604, 0.5), (5997, 0.5), (6770, 0.5), (6932, 0.5), (7268, 0.5), (7985, 0.5), (8424, 0.5), (8456, 0.5), (9150, 0.5), (9403, 0.5), (9687, 0.5), (9722, 0.5), (439, 0.4740052347801208), (1905, 0.4740052347801208), (2001, 0.4740052347801208), (3926, 0.4740052347801208), (8322, 0.4740052347801208), (9065, 0.4740052347801208), (9474, 0.4740052347801208), (285, 0.469920106724225), (1071, 0.469920106724225), (1183, 0.469920106724225), (2038, 0.469920106724225), (3885, 0.469920106724225), (5534, 0.469920106724225), (6726, 0.469920106724225), (7616, 0.469920106724225), (7855, 0.469920106724225), (8202, 0.469920106724225), (8314, 0.469920

In [16]:
amarelo = references
vermelho = recs

count = 0
recs_dict = []
for reference in amarelo:
#     print('Referência: {}\t gêneros: {}'.format(refinedMyAlgo.movies[refinedMyAlgo.movies['movieId']==reference['movieID']].values[0][1], refinedMyAlgo.movies[refinedMyAlgo.movies['movieId']==reference['movieID']].values[0][2]))

    for movie in vermelho[count]:
        aux = {}

        movie_id = refinedMyAlgo.movies.loc[movie[0]].values[0]
        movie_title = refinedMyAlgo.movies.loc[movie[0]].values[1]
        movie_genres = refinedMyAlgo.movies.loc[movie[0]].values[2]
        movie_similarity = movie[1]
        movie_relevance = ((reference['rating']/5.0)+movie_similarity)/2

        aux['movie_id'] = movie_id
        aux['movie_title'] = movie_title
        aux['movie_genres'] = movie_genres
        aux['movie_similarity'] = movie_similarity
        aux['movie_relevance'] = movie_relevance

        recs_dict.append(aux)

        print('\tSim: {},\trelevance: {},\tmovieId: {},\ttitle: {}'.format(aux['movie_similarity'], aux['movie_relevance'], aux['movie_id'], aux['movie_title']))

    count=count+1

recs_dict = sorted(recs_dict, key = lambda i: i['movie_relevance'],reverse=True)

print("@ @ @ @ @ @ @ @ @ @ @ @ @")
recs_dict

	Sim: 0.5000000000000001,	relevance: 0.6827000304811133,	movieId: 2413,	title: Clue
	Sim: 0.5000000000000001,	relevance: 0.6827000304811133,	movieId: 4610,	title: January Man, The
	Sim: 0.5000000000000001,	relevance: 0.6827000304811133,	movieId: 4898,	title: Novocaine
	Sim: 0.5000000000000001,	relevance: 0.6827000304811133,	movieId: 5021,	title: Murder by Death
	Sim: 0.5000000000000001,	relevance: 0.6827000304811133,	movieId: 5938,	title: Deathtrap
	Sim: 0.47828245907659095,	relevance: 0.6718412600194088,	movieId: 1799,	title: Suicide Kings
	Sim: 0.47828245907659095,	relevance: 0.6718412600194088,	movieId: 27674,	title: 11:14
	Sim: 0.471993028450314,	relevance: 0.6686965447062703,	movieId: 1086,	title: Dial M for Murder
	Sim: 0.471993028450314,	relevance: 0.6686965447062703,	movieId: 1661,	title: Switchback
	Sim: 0.471993028450314,	relevance: 0.6686965447062703,	movieId: 2579,	title: Following
	Sim: 0.5,	relevance: 0.6808348486346194,	movieId: 257,	title: Just Cause
	Sim: 0.5,	relevanc

[{'movie_id': 3280,
  'movie_title': 'Baby, The',
  'movie_genres': 'Horror',
  'movie_similarity': 0.6572141853842605,
  'movie_relevance': 0.7433642685875197},
 {'movie_id': 93838,
  'movie_title': 'The Raid: Redemption',
  'movie_genres': 'Action, Crime',
  'movie_similarity': 0.5639156906809616,
  'movie_relevance': 0.6944099556775261},
 {'movie_id': 2413,
  'movie_title': 'Clue',
  'movie_genres': 'Comedy, Crime, Mystery, Thriller',
  'movie_similarity': 0.5000000000000001,
  'movie_relevance': 0.6827000304811133},
 {'movie_id': 4610,
  'movie_title': 'January Man, The',
  'movie_genres': 'Comedy, Crime, Mystery, Thriller',
  'movie_similarity': 0.5000000000000001,
  'movie_relevance': 0.6827000304811133},
 {'movie_id': 4898,
  'movie_title': 'Novocaine',
  'movie_genres': 'Comedy, Crime, Mystery, Thriller',
  'movie_similarity': 0.5000000000000001,
  'movie_relevance': 0.6827000304811133},
 {'movie_id': 5021,
  'movie_title': 'Murder by Death',
  'movie_genres': 'Comedy, Crime, M

In [14]:
candidates_list = refinedMyAlgo.get_relevance_score(recs=recs, references=references)
print(len(candidates_list))

for item in candidates_list[0:20]:
    print('relevance: {}, title:{}'.format(item['movie_relevance'], item['movie_title']))

100
relevance: 0.7433642685875197, title:Baby, The
relevance: 0.6944099556775261, title:The Raid: Redemption
relevance: 0.6827000304811133, title:Clue
relevance: 0.6827000304811133, title:January Man, The
relevance: 0.6827000304811133, title:Novocaine
relevance: 0.6827000304811133, title:Murder by Death
relevance: 0.6827000304811133, title:Deathtrap
relevance: 0.6808348486346194, title:Just Cause
relevance: 0.6808348486346194, title:Underneath
relevance: 0.6808348486346194, title:True Crime
relevance: 0.6808348486346194, title:Absolute Power
relevance: 0.6808348486346194, title:Saboteur
relevance: 0.6808348486346194, title:Mortal Thoughts
relevance: 0.6808348486346194, title:Eyes of Laura Mars
relevance: 0.6808348486346194, title:Jennifer 8
relevance: 0.6808348486346194, title:Obsession
relevance: 0.6808348486346194, title:Cat o' Nine Tails, The (Gatto a nove code, Il)
relevance: 0.6718412600194088, title:Suicide Kings
relevance: 0.6718412600194088, title:11:14
relevance: 0.66869654470

In [13]:
dist_i_R = refinedMyAlgo.calc_distance_item_in_list(item=candidates_list[24],this_list=candidates_list[0:5])
dist_i_R

0.6669543809016186

In [102]:
print(refinedMyAlgo.calc_diversity_score(actual_list=candidates_list[0:5], candidates_list=candidates_list[10:20]))

[{'div_score': 0.5152068879803822, 'idx': 0}, {'div_score': 0.7100494050081605, 'idx': 1}, {'div_score': 0.7100494050081605, 'idx': 2}, {'div_score': 0.7100494050081605, 'idx': 3}, {'div_score': 0.7100494050081605, 'idx': 4}, {'div_score': 0.7100494050081605, 'idx': 5}, {'div_score': 0.7100494050081605, 'idx': 6}, {'div_score': 0.7100494050081605, 'idx': 7}, {'div_score': 0.7100494050081605, 'idx': 8}, {'div_score': 0.7100494050081605, 'idx': 9}]


In [14]:
my_candidates = candidates_list.copy()
final_recs = refinedMyAlgo.diversify_recs_list(recs=my_candidates)
for item in final_recs:
    print('relevance: {}, title:{}'.format(item['movie_relevance'], item['movie_title']))

relevance: 0.6722114831090638, title:Vovka in the Kingdom of Far Far Away
relevance: 0.6686136914706732, title:Just Cause
relevance: 0.6655443188693326, title:Don't Be a Menace to South Central While Drinking Your Juice in the Hood
relevance: 0.6376398341285096, title:Princess and the Warrior, The (Krieger und die Kaiserin, Der)
relevance: 0.6555025187005001, title:Ninja Scroll (Jûbei ninpûchô)
relevance: 0.6686136914706732, title:Underneath
relevance: 0.6655443188693326, title:Striptease
relevance: 0.6600746872551528, title:Witness
relevance: 0.6534993064714851, title:Care Bears Movie, The
relevance: 0.6655443188693326, title:Carpool
